@inproceedings{c28440a477684cf19c651494a963b421,
title = "Fitting a Square Peg into a Round Hole: Creating a UniMorph dataset of Kanien'k{\'e}ha Verbs",
abstract = "This paper describes efforts to annotate a dataset of verbs in the Iroquoian language Kanien'k{\'e}ha (a.k.a. Mohawk) using the UniMorph schema (Batsuren et al., 2022a). The dataset is based on the output of a symbolic model - a hand-built verb conjugator. Morphological constituents of each verb are automatically annotated with UniMorph tags. Overall the process was smooth but some central features of the language did not fall neatly into the schema which resulted in a large number of custom tags and a somewhat ad hoc mapping process. We think the same difficulties are likely to arise for other Iroquoian languages and perhaps other North American language families. This paper describes our decision making process with respect to Kanien'k{\'e}ha and reports preliminary results of morphological induction experiments using the dataset.",
author = "Anna Kazantseva and Akwirat{\'e}kha Martin and Karin Michelson and Koenig, \{Jean Pierre\}",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 7th Workshop on the Use of Computational Methods in the Study of Endangered Languages, ComputEL 2024 ; Conference date: 21-03-2024 Through 22-03-2024",
year = "2024",
language = "English",
series = "ComputEL 2024 - 7th Workshop on the Use of Computational Methods in the Study of Endangered Languages, Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "39--51",
editor = "Sarah Moeller and Godfred Agyapong and Antti Arppe and Aditi Chaudhary and Shruti Rijhwani and Christopher Cox and Ryan Henke and Alexis Palmer and Daisy Rosenblum and Lane Schwartz",
booktitle = "ComputEL 2024 - 7th Workshop on the Use of Computational Methods in the Study of Endangered Languages, Proceedings of the Workshop",
}