{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T07:42:42Z","timestamp":1770018162462,"version":"3.49.0"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2010,11,3]],"date-time":"2010-11-03T00:00:00Z","timestamp":1288742400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2011,5]]},"DOI":"10.1007\/s10579-010-9132-x","type":"journal-article","created":{"date-parts":[[2010,11,2]],"date-time":"2010-11-02T14:21:39Z","timestamp":1288707699000},"page":"143-164","source":"Crossref","is-referenced-by-count":54,"title":["Lessons from building a Persian written corpus: Peykare"],"prefix":"10.1007","volume":"45","author":[{"given":"Mahmood","family":"Bijankhan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Javad","family":"Sheykhzadegan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohammad","family":"Bahrani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masood","family":"Ghayoomi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,11,3]]},"reference":[{"issue":"2","key":"9132_CR1","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1075\/ijcl.11.2.02als","volume":"11","author":"L Al-Sulaiti","year":"2006","unstructured":"Al-Sulaiti, L., & Atwell, E. (2006). The design of a corpus of contemporary Arabic. International Journal of Corpus Linguistics, 11(2), 135\u2013171.","journal-title":"International Journal of Corpus Linguistics"},{"issue":"1","key":"9132_CR2","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1075\/ijcl.5.1.05ass","volume":"5","author":"M Assi","year":"2000","unstructured":"Assi, M., & Abdolhosseini, M. H. (2000). Grammatical tagging of a Persian corpus. International Journal of Corpus Linguistics, 5(1), 69\u201381.","journal-title":"International Journal of Corpus Linguistics"},{"issue":"1","key":"9132_CR3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/llc\/7.1.1","volume":"7","author":"S Atkins","year":"1992","unstructured":"Atkins, S., Clear, J., & Ostler, N. (1992). Corpus design criteria. Literary and Linguistic Computing, 7(1), 1\u201316.","journal-title":"Literary and Linguistic Computing"},{"key":"9132_CR4","first-page":"174","volume-title":"Corpus linguistics: Readings in a widening discipline","author":"D Biber","year":"1992","unstructured":"Biber, D. (1992). Representativeness in corpus design. In G. Sampson & D. McCarthy (Eds.), Corpus linguistics: Readings in a widening discipline (pp. 174\u2013197). New York, USA: Continuum."},{"issue":"2","key":"9132_CR5","first-page":"221","volume":"19","author":"D Biber","year":"1993","unstructured":"Biber, D. (1993). Using register-diversified corpora for general language studies. Computational Linguistics, 19(2), 221\u2013241.","journal-title":"Computational Linguistics"},{"key":"9132_CR6","unstructured":"Bijankhan, M. et al. (1994). Farsi spoken language database: FARSDAT. In Proceedings of the 5th international conference on speech sciences and technology (ICSST), Perth (Vol. 2, pp. 826\u2013829)."},{"key":"9132_CR7","unstructured":"Bijankhan, M. et al. (2003). TFARSDAT: Telephone Farsi spoken language database. EuroSpeech, Geneva (3), pp. 1525\u20131528."},{"key":"9132_CR8","unstructured":"Bijankhan, M. et al. (2004). The large Persian speech database. In Proceedings of the 1st workshop on Persian language and computer, the University of Tehran, Tehran, Iran (pp. 149\u2013150)."},{"key":"9132_CR9","unstructured":"Buckwalter, T. (2005). Issues in Arabic orthography and morphology analysis. In Proceedings of the workshop on computational approaches to arabic script-based languages in conjunction with COLING 2004, Switzerland."},{"key":"9132_CR10","volume-title":"Syntactic wordclass tagging","author":"J Cloeren","year":"1999","unstructured":"Cloeren, J. (1999). Tagsets. In H. V. Halteren (Ed.), Syntactic wordclass tagging. Dordrecht, The Netherlands: Kluwer."},{"issue":"1","key":"9132_CR11","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1093\/llc\/18.1.23","volume":"18","author":"FM Douglas","year":"2003","unstructured":"Douglas, F. M. (2003). The Scottish corpus of texts and speech: Problems of corpus design. Literary and Linguistic Computing, 18(1), 23\u201337.","journal-title":"Literary and Linguistic Computing"},{"key":"9132_CR12","doi-asserted-by":"crossref","unstructured":"Ghayoomi, M., & Momtazi, S. (2009). Challenges in developing Persian corpora from online resources. In Proceedingss of IEEE international conference on Asian language processing, Singapore.","DOI":"10.1109\/IALP.2009.31"},{"issue":"1","key":"9132_CR13","first-page":"17","volume":"20","author":"M Ghayoomi","year":"2010","unstructured":"Ghayoomi, M., Momtazi, S., & Bijankhan, M. (2010). A study of corpus development for Persian. International Journal on Asian Language Processing, 20(1), 17\u201333.","journal-title":"International Journal on Asian Language Processing"},{"key":"9132_CR14","unstructured":"Ghomeshi, J. (1996). Projection and inflection: A study of persian phrase structure. Ph.D. thesis, University of Toronto, Toronto, ON."},{"key":"9132_CR15","unstructured":"Haji\u010d, J. (2000). Morphological tagging: Data vs. dictionaries. In Proceedings of the 6th applied natural language processing conference, Washington (pp. 94\u2013101)."},{"key":"9132_CR16","unstructured":"Hearst, M. A. (1991). Noun homograph disambiguation using local context in large text corpora. In Proceedings of the 7th annual conference of the University of Waterloo, Center for the new OED and text research, Oxford."},{"key":"9132_CR17","doi-asserted-by":"crossref","unstructured":"Hodge, C. T. (1957). Some aspects of Persian style. Language, 33(3) Part 1, 355\u2013369.","DOI":"10.2307\/411158"},{"issue":"2","key":"9132_CR18","doi-asserted-by":"crossref","first-page":"331","DOI":"10.2307\/415831","volume":"70","author":"R Hudson","year":"1994","unstructured":"Hudson, R. (1994). About 37% word-tokens are nouns. Language, 70(2), 331\u2013339.","journal-title":"Language"},{"key":"9132_CR19","volume-title":"Road map for localization","author":"S Hussain","year":"2005","unstructured":"Hussain, S., & Gul, S. (2005). Road map for localization. Lahore, Pakistan: Center for Research in Urdu Language Processing, National University of Computer and Emerging Sciences."},{"key":"9132_CR20","unstructured":"Kawata, Y. (2001). Towards a reference tagset for Japanese. In Proceedings of the 6th natural language processing Pacific rim symposium post-conference workshop, Tokyo (pp. 55\u201362)."},{"key":"9132_CR21","unstructured":"Khoja, S., Garside, R., & Knowles, G. (2001). A tagset for the morpho-syntactic tagging of Arabic. Lancaster University, Computing Department. http:\/\/archimedes.fas.harvard.edu\/mdh\/arabic\/CL2001.pdf ."},{"issue":"3","key":"9132_CR22","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1075\/ijcl.10.3.04kra","volume":"10","author":"J Kralik","year":"2005","unstructured":"Kralik, J., & \u0160ulc, M. (2005). The representativeness of Czeck corpora. International Journal of Corpus Linguistics, 10(3), 357\u2013366.","journal-title":"International Journal of Corpus Linguistics"},{"issue":"2","key":"9132_CR23","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1093\/llc\/17.2.245","volume":"17","author":"K Ku\u010dera","year":"2002","unstructured":"Ku\u010dera, K. (2002). The Czech national corpus: Principles, design, and results. Literary and Linguistic Computing, 17(2), 245\u2013247.","journal-title":"Literary and Linguistic Computing"},{"key":"9132_CR24","unstructured":"Leech, G. (2002). The importance of reference corpora. Donostia, 2002-10-24\/25. www.corpus4u.org\/upload\/forum\/2005060301260076.pdf ."},{"key":"9132_CR25","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/978-94-015-9273-4_5","volume-title":"Syntactic wordclass tagging","author":"G Leech","year":"1999","unstructured":"Leech, G., & Wilson, A. (1999). Standards for tagsets. In H. V. Halteren (Ed.), Syntactic wordclass tagging (pp. 55\u201381). Dordrecht, The Netherlands: Kluwer."},{"key":"9132_CR26","volume-title":"Foundations of statistical natural language processing","author":"CD Manning","year":"1999","unstructured":"Manning, C. D., & Sch\u00fctze, H. (1999). Foundations of statistical natural language processing. Cambridge: The MIT press."},{"key":"9132_CR27","unstructured":"Marcus, M. P., Santorini, B., & Marcinkiewicz, M. A. (1993). Building a large annotated corpus of english: The penn treebank. http:\/\/citeseer.comp.nus.edu.sg\/587575.html ."},{"key":"9132_CR28","unstructured":"Megerdoomian, K. (2000). Persian computational morphology: A unification-based approach. NMSU, CRL, Memoranda in Computer and Cognitive Science (MCCS-00-320)."},{"issue":"1","key":"9132_CR29","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1080\/09296170500500884","volume":"13","author":"T Mosavi-Miangah","year":"2006","unstructured":"Mosavi-Miangah, T. (2006). Automatic lemmatization of Persian words: Project report. Journal of Quantitative Linguistics, 13(1), 1\u201315.","journal-title":"Journal of Quantitative Linguistics"},{"key":"9132_CR30","doi-asserted-by":"crossref","unstructured":"Muthusamy, Y. K., Cole, R. A., & Oshika, B. T. (1992). The OGI multi-language telephone Speech Corpus. In Proceedings of the 2nd international conference on spoken language processing (ICSLP), Banff (pp. 895\u2013898).","DOI":"10.21437\/ICSLP.1992-276"},{"key":"9132_CR31","doi-asserted-by":"crossref","first-page":"605","DOI":"10.1017\/S0022226707004781","volume":"43","author":"P Samvelian","year":"2007","unstructured":"Samvelian, P. (2007). A (phrasal) affix analysis of the Persian Ezafe. Journal of Linguistics, 43, 605\u2013645.","journal-title":"Journal of Linguistics"},{"key":"9132_CR32","unstructured":"Sheykhzadegan, J., & Bijankhan, M. (2006). The speech databases of Persian language. In Proceedings of the 2nd workshop on Persian language and computing, the University of Tehran, Tehran, Iran (pp. 247\u2013261)."},{"key":"9132_CR33","unstructured":"Sinclair, J. (1987). Corpus creation. In G. Sampson and D. McCarthy (Eds.), Corpus linguistics: Readings in a widening discipline, 2004 (pp. 78\u201384). New York: Continuum."},{"key":"9132_CR34","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/978-94-015-9273-4_2","volume-title":"Syntactic wordclass tagging","author":"A Voutilainen","year":"1999","unstructured":"Voutilainen, A. (1999). A short history of tagging. In H. V. Halteren (Ed.), Syntactic wordclass tagging (pp. 9\u201319). Dordrecht, The Netherlands: Kluwer."}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-010-9132-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10579-010-9132-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-010-9132-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T14:02:03Z","timestamp":1740664923000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10579-010-9132-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,11,3]]},"references-count":34,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2011,5]]}},"alternative-id":["9132"],"URL":"https:\/\/doi.org\/10.1007\/s10579-010-9132-x","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,11,3]]}}}