@inproceedings{ab0f03ca106a44968ee55589aeb9a2ff,
title = "MAML-en-LLM: Model Agnostic Meta-Training of LLMs for Improved In-Context Learning",
abstract = "Adapting large language models (LLMs) to unseen tasks with incontext training samples without fine-tuning remains an important research problem. To learn a robust LLM that adapts well to unseen tasks, multiple meta-training approaches have been proposed such as MetaICL and MetaICT, which involve meta-training pre-trained LLMs on a wide variety of diverse tasks. These meta-training approaches essentially perform in-context multi-task fine-tuning and evaluate on a disjointed test set of tasks. Even though they achieve impressive performance, their goal is never to compute a truly general set of parameters. In this paper, we propose MAML-en-LLM, a novel method for meta-training LLMs, which can learn truly generalizable parameters that not only performs well on disjointed tasks but also adapts to unseen tasks. We see an average increase of 2\% on unseen domains in the performance while a massive 4\% improvement on adaptation performance. Furthermore, we demonstrate that MAML-en-LLM outperforms baselines in settings with limited amount of training data on both seen and unseen domains by an average of 2\%. Finally, we discuss the effects of type of tasks, optimizers and task complexity, an avenue barely explored in metatraining literature. Exhaustive experiments across 7 task settings along with two data settings demonstrate that models trained with MAML-en-LLM outperform SOTA meta-training approaches.",
keywords = "LLMs, generalization, in-context learning, meta learning, optimization",
author = "Sanchit Sinha and Yuguang Yue and Victor Soto and Mayank Kulkarni and Jianhua Lu and Aidong Zhang",
note = "Publisher Copyright: {\textcopyright} 2024 Copyright held by the owner/author(s).; 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2024 ; Conference date: 25-08-2024 Through 29-08-2024",
year = "2024",
month = aug,
day = "24",
doi = "10.1145/3637528.3671905",
language = "English",
series = "Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery ",
pages = "2711--2720",
booktitle = "KDD 2024 - Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining",
address = "United States",
}