collas_2022.bib

% if the proceedings is a reissued proceedings, please add the field 
% 'firstpublished' to the entry below, giving the original data of 
% publication in YYYY-MM-DD format.
@Proceedings{CoLLAs-2022,
    booktitle = {Proceedings of The 1st Conference on Lifelong Learning Agents},
    name = {Conference on Lifelong Learning Agents},
    shortname = {CoLLAs},
    editor = {Chandar, Sarath and Pascanu, Razvan and Precup, Doina},
    volume = {},
    year = {2022},
    start = {2022-08-22},
    end = {2022-08-24},
    published = {2022-11-28},
    conference_url = {https://lifelong-ml.cc/},
    conference_number = {1},
    address = {McGill University, Montr\'{e}al, Qu\'{e}bec, Canada}
}

@InProceedings{li2022, 
	title = {Energy-Based Models for Continual Learning}, 
	author = {Li, Shuang and Du, Yilun and van de Ven, Gido and Mordatch, Igor}, 
	abstract = {We motivate Energy-Based Models (EBMs) as a promising model class for continual learning problems. Instead of tackling continual learning via the use of external memory, growing models, or regularization, EBMs change the underlying training objective to cause less interference with previously learned information. Our proposed version of EBMs for continual learning is simple, efficient, and outperforms baseline methods by a large margin on several benchmarks. Moreover, our proposed contrastive divergence-based training objective can be combined with other continual learning methods, resulting in substantial boosts in their performance. We further show that EBMs are adaptable to a more general continual learning setting where the data distribution changes without the notion of explicitly delineated tasks. These observations point towards EBMs as a useful building block for future continual learning methods.}, 
	pages = {1-22}, 
	video = {https://youtu.be/iZ4cz5RQmzE}
}

@InProceedings{collins2022, 
	title = {How Does the Task Landscape Affect MAML Performance?}, 
	author = {Collins, Liam and Mokhtari, Aryan and Shakkottai, Sanjay}, 
	abstract = {Model-Agnostic Meta-Learning (MAML) has become increasingly popular for training models that can quickly adapt to new tasks via one or few stochastic gradient descent steps. However, the MAML objective is significantly more difficult to optimize compared to standard non-adaptive learning (NAL), and little is understood about how much MAML improves over NAL in terms of the fast adaptability of their solutions in various scenarios. We analytically address this issue in a linear regression setting consisting of a mixture of easy and hard tasks, where hardness is related to the rate that gradient descent converges on the task. Specifically, we prove that in order for MAML to achieve substantial gain over NAL, (i) there must be some discrepancy in hardness among the tasks, and (ii) the optimal solutions of the hard tasks must be closely packed with the center far from the center of the easy tasks optimal solutions. We also give numerical and analytical results suggesting that these insights apply to two-layer neural networks. Finally, we provide few-shot image classification experiments that support our insights for when MAML should be used and emphasize the importance of training MAML on hard tasks in practice.}, 
	pages = {23-59}, 
	video = {https://youtu.be/1T4-2AwFqoM}
}

@InProceedings{ostapenko2022, 
	title = {Continual Learning with Foundation Models: An Empirical Study of Latent Replay}, 
	author = {Ostapenko, Oleksiy and Lesort, Timothee and Rodriguez, Pau and Arefin, Md Rifat and Douillard, Arthur and Rish, Irina and Charlin, Laurent}, 
	abstract = {Rapid development of large-scale pre-training has resulted in foundation models that can act as effective feature extractors on a variety of downstream tasks and domains. Motivated by this, we study the efficacy of pre-trained vision models as a foundation for downstream continual learning (CL) scenarios. Our goal is twofold. First, we want to understand the compute-accuracy trade-off between CL in the raw-data space and in the latent space of pre-trained encoders. Second, we investigate how the characteristics of the encoder, the pre-training algorithm and data, as well as of the resulting latent space affect CL performance. For this, we compare the efficacy of various pre-trained models in large-scale benchmarking scenarios with a vanilla replay setting applied in the latent and in the raw-data space. Notably, this study shows how transfer, forgetting, task similarity and learning are dependent on the input data characteristics and not necessarily on the CL algorithms. First, we show that under some circumstances reasonable CL performance can readily be achieved with a non-parametric classifier at negligible compute. We then show how models pre-trained on broader data result in better performance for various replay sizes. We explain this with representational similarity and transfer properties of these representations. Finally, we show the effectiveness of self-supervised pre-training for downstream domains that are out-of-distribution as compared to the pre-training domain. We point out and validate several research directions that can further increase the efficacy of latent CL including representation ensembling. The diverse set of datasets used in this study can serve as a compute-efficient playground for further CL research. We will publish the code.}, 
	pages = {60-91}, 
	video = {https://youtu.be/X74Tp_PolTA}
}

@InProceedings{ma2022, 
	title = {EFL: Elastic Federated Learning on Non-IID Data}, 
	author = {Ma, Zichen and Lu, Yu and Li, Wenye and Cui, Shuguang}, 
	abstract = {Federated learning involves training machine learning models over devices or data silos, such as edge processors or data warehouses while keeping the data local. However, training in heterogeneous and potentially massive networks introduces bias into the system, originating from the non-IID data and the low participation rate. In this paper, we propose Elastic Federated Learning (EFL), an unbiased federated training framework capable of tackling the heterogeneity in the system. EFL extends lifelong learning to realistic federated settings, makes the most informative parameters less volatile during training, and utilizes incomplete local updates. It is also an efficient and effective algorithm that compresses upstream and downstream communications with a convergence guarantee. We empirically demonstrate the efficacy of our framework on a variety of non-IID datasets and show the competitive performance of the algorithm on robustness and efficiency.}, 
	pages = {92-115}, 
	video = {https://youtu.be/Yp-9Qy5_aac}
}

@InProceedings{funke2022, 
	title = {Disentanglement and Generalization Under Correlation Shifts}, 
	author = {Funke, Christina M. and Vicol, Paul and Wang, Kuan-chieh and Kuemmerer, Matthias and Zemel, Richard and Bethge, Matthias}, 
	abstract = {Correlations between factors of variation are prevalent in real-world data. Exploiting such correlations may increase predictive performance on noisy data; however, often correlations are not robust (e.g., they may change between domains, datasets, or applications) and models that exploit them do not generalize when correlations shift. Disentanglement methods aim to learn representations which capture different factors of variation in latent subspaces. A common approach involves minimizing the mutual information between latent subspaces, such that each encodes a single underlying attribute. However, this fails when attributes are correlated. We solve this problem by enforcing independence between subspaces conditioned on the available attributes, which allows us to remove only dependencies that are not due to the correlation structure present in the training data. We achieve this via an adversarial approach to minimize the conditional mutual information (CMI) between subspaces with respect to categorical variables. We first show theoretically that CMI minimization is a good objective for robust disentanglement on linear problems. We then apply our method on real-world datasets based on MNIST and CelebA, and show that it yields models that are disentangled and robust under correlation shift, including in weakly supervised settings.}, 
	pages = {116-141}, 
	video = {https://youtu.be/3uhumivgo-4}
}

@InProceedings{metz2022, 
	title = {Practical Tradeoffs between Memory, Compute, and Performance in Learned Optimizers}, 
	author = {Metz, Luke and Freeman, C. Daniel and Harrison, James and Maheswaranathan, Niru and Sohl-dickstein, Jascha}, 
	abstract = {Optimization plays a costly and crucial role in developing machine learning systems. In learned optimizers, the few hyperparameters of commonly used hand-designed optimizers, e.g. Adam or SGD, are replaced with flexible parametric functions. The parameters of these functions are then optimized so that the resulting learned optimizer minimizes a target loss on a chosen class of models. Learned optimizers can both reduce the number of required training steps and improve the final test loss. However, they can be expensive to train, and once trained can be expensive to use due to computational and memory overhead for the optimizer itself. In this work, we identify and quantify the design features governing the memory, compute, and performance trade-offs for many learned and hand-designed optimizers. We further leverage our analysis to construct a learned optimizer that is both faster and more memory efficient than previous work. Our model and training code are open source.}, 
	pages = {142-164}, 
	video = {https://youtu.be/7pUiAOTiAzc}
}

@InProceedings{caccia2022, 
	title = {On Anytime Learning at Macroscale}, 
	author = {Caccia, Lucas and Xu, Jing and Ott, Myle and Ranzato, Marcaurelio and Denoyer, Ludovic}, 
	abstract = {In many practical applications of machine learning data arrives sequentially over time in large chunks. Practitioners have then to decide how to allocate their computational budget in order to obtain the best performance at any point in time. Online learning theory for convex optimization suggests that the best strategy is to use data as soon as it arrives. However, this might not be the best strategy when using deep non-linear networks, particularly when these perform multiple passes over each chunk of data rendering the overall distribution non i.i.d.. In this paper, we formalize this learning setting in the simplest scenario in which each data chunk is drawn from the same underlying distribution, and make a first attempt at empirically answering the following questions: How long should the learner wait before training on the newly arrived chunks? What architecture should the learner adopt? Should the learner increase capacity over time as more data is observed? We probe this learning setting using convolutional neural networks trained on classic computer vision benchmarks as well as a large transformer model trained on a large-scale language modeling task. Code is available in the supplementary material.}, 
	pages = {165-182}, 
	video = {https://youtu.be/UbaFAhVSFJw}
}

@InProceedings{marullo2022, 
	title = {Continual Unsupervised Learning for Optical Flow Estimation with Deep Networks}, 
	author = {Marullo, Simone and Tiezzi, Matteo and Betti, Alessandro and Faggi, Lapo and Meloni, Enrico and Melacci, Stefano}, 
	abstract = {In the last few years there has been a growing interest in approaches that allow neural networks to learn how to predict optical flow, both in a supervised and, more recently, unsupervised manner. While this clearly opens up the possibility of learning to estimate optical flow in a truly lifelong setting, by processing a potentially endless video stream, existing techniques assume to have access to large datasets and they perform stochastic mini-batch-based gradient optimization, paired with further ad-hoc components. We present an extensive study on how neural networks can learn to estimate optical flow in a continual manner while observing a long video stream and reacting online to the streamed information without any further data buffering. To this end, we rely on photo-realistic video streams that we specifically created using 3D virtual environments, as well as on a real-world movie. Our analysis considers important model selection issues that might be easily overlooked at a first glance, comparing different neural architectures and also state-of-the-art models  pretrained in an offline manner. Our results not only show the feasibility of continual unsupervised learning in optical flow estimation, but also indicate that the learned models, in several situations, are comparable to state-of-the-art offline-pretrained networks. Moreover, we show how common issues in continual learning, such as catastrophic forgetting, do not affect the proposed models in a disruptive manner, given the task at hand.}, 
	pages = {183-200}, 
	video = {https://youtu.be/HYqt01wHazc}
}

@InProceedings{betti2022, 
	title = {Continual Learning through Hamilton Equations}, 
	author = {Betti, Alessandro and Faggi, Lapo and Gori, Marco and Tiezzi, Matteo and Marullo, Simone and Meloni, Enrico and Melacci, Stefano}, 
	abstract = {Learning in a continual manner is one of the main challenges that the machine learning community is currently facing. The importance of the problem can be readily understood as soon as we consider settings where an agent is supposed to learn through an online interaction with a data stream, rather than operating offline on previously prepared data collections. In the last few years many efforts have been spent in proposing both models and algorithms to let machines learn in a continual manner, and the problem still remains extremely challenging.  Many of the existing works rely on re-adapting the usual learning framework inherited from classic statistical approaches, that are typical of non-continual-learning oriented problems.  In this paper we consider a fully new perspective, rethinking the methodologies to be used to tackle continual learning, instead of re-adapting offline-oriented optimization.  In particular, we propose a novel method to frame continual and online learning within the framework of optimal control. The proposed formulation leads to a novel interpretation of learning dynamics in terms of Hamilton equations.  As a case study for the theory, we consider the problem of unsupervised optical flow estimation from a video stream. An experimental proof of concept for this learning task is discussed with the purpose of illustrating the soundness of the proposed approach, and opening to further research in this direction.}, 
	pages = {201-212}, 
	video = {https://youtu.be/3ybH-XmBS2I}
}

@InProceedings{guiroy2022, 
	title = {Improving Meta-Learning Generalization with Activation-Based Early-Stopping}, 
	author = {Guiroy, Simon and Pal, Christopher and Mordido, Goncalo and Chandar, Sarath}, 
	abstract = {Meta-learning algorithms for few-shot learning aim to train neural networks capable of generalizing to novel tasks using only a few examples. Early-stopping is critical for performance, halting model training when it reaches optimal generalization to the new task distribution. Early-stopping mechanisms in Meta-Learning typically rely on measuring the model performance on labeled examples from a meta-validation set drawn from the training (source) dataset. This is problematic in few-shot transfer learning settings, where the meta-test set comes from a different target dataset (OOD) and can potentially have a large distributional shift with the meta-validation set. In this work, we propose Activation Based Early-stopping (ABE), an alternative to using validation-based early-stopping for meta-learning. Specifically, we analyze the evolution, during meta-training, of the neural activations at each hidden layer, on a small set of unlabelled support examples from a single task of the target tasks distribution, as this constitutes a minimal and justifiably accessible information from the target problem. Our experiments show that simple, label agnostic statistics on the activations offer an effective way to estimate how the target generalization evolves over time. At each hidden layer, we characterize the activation distributions, from their first and second order moments, then further summarized along the feature dimensions, resulting in a compact yet intuitive characterization in a four-dimensional space. Detecting when, throughout training time, and at which layer, the target activation trajectory diverges from the activation trajectory of the source data, allows us to perform early-stopping and improve generalization in a large array of few-shot transfer learning settings, across different algorithms, source and target datasets.}, 
	pages = {213-230}, 
	video = {https://youtu.be/C4py5DYHRwA}
}

@InProceedings{shperberg2022, 
	title = {A Rule-based Shield: Accumulating Safety Rules from Catastrophic Action Effects}, 
	author = {Shperberg, Shahaf S. and Liu, Bo and Allievi, Alessandro and Stone, Peter}, 
	abstract = {Deploying autonomous agents in the real-world can lead to risks both to the agents and to the humans with whom they interact. As a result, it is essential for agents to try to achieve their objectives while acting as safely as possible. Thus, learning agents ought to learn not only about the effectiveness of actions, but also about their safety. While action effectiveness is task-dependent, information regarding the safety of actions can be preserved even if the task and/or the objective of the agent changes. The focus of this work is to leverage information from unsafe situations that the agent has experienced in order to obtain safety rules that identify which action from which state can lead to unsafe outcomes. These rules can be used for shielding the agent from repeating the same mistakes, as well as other mistakes that lead to the same catastrophic outcomes. In essence, before each action is selected for execution by the policy, actions which violate one of the safety rules from the current state are masked away and will not be selected. The cumulative set of safety rules can be used even when the agent faces multiple tasks, and can also be shared between different agents, so that mistakes that were made by one agent are not repeated by any of the agents that share the same rule-based shield. The process of learning a rule-based shield online is studied on a multi-task autonomous driving problem. Finally, the application of a rule-based shield to the Proximal Policy Optimization (PPO) algorithm is empirically evaluated and compared with the original PPO, with variants of PPO which use other online-learned shields, and with other baselines from the safe reinforcement learning literature. The results show that safety rules can significantly reduce the number of unsafe outcomes that agents experience, while even improving the cumulative rewards obtained by the agents.}, 
	pages = {231-242}, 
	video = {https://youtu.be/QhL2xDL5niY}
}

@InProceedings{liu2022, 
	title = {Continual Learning and Private Unlearning}, 
	author = {Liu, Bo and Liu, Qiang and Stone, Peter}, 
	abstract = {As intelligent agents become autonomous over longer periods of time, they may eventually become lifelong counterparts to specific people. If so, it may be common for a user to want the agent to master a task temporarily but later on to forget the task due to privacy concerns. However enabling an agent to forget privately what the user specified without degrading the rest of the learned knowledge is a challenging problem. With the aim of addressing this challenge, this paper formalizes this continual learning and private unlearning (CLPU) problem. The paper further introduces a straightforward but exactly private solution, CLPU-DER++, as the first step towards solving the CLPU problem, along with a set of carefully designed benchmark problems to evaluate the effectiveness of the proposed solution.}, 
	pages = {243-254}, 
	video = {https://youtu.be/I0Pd_F9Us_c}
}

@InProceedings{beckham2022, 
	title = {Overcoming challenges in leveraging GANs for few-shot data augmentation}, 
	author = {Beckham, Christopher and Laradji, Issam H. and Rodriguez, Pau and Vazquez, David and Nowrouzezahrai, Derek and Pal, Christopher}, 
	abstract = {In this paper, we explore the use of GAN-based few-shot data augmentation as a method to improve few-shot classification performance. We perform an exploration into how a GAN can be fine-tuned for such a task (one of which is in a \emph{class-incremental} manner), as well as a rigorous empirical investigation into how well these models can perform to improve few-shot classification. We identify issues related to the difficulty of training and applying such generative models under a purely supervised regime with very few examples, as well as issues regarding the evaluation protocols of existing works. We also find that in this regime, classification accuracy is highly sensitive to how the classes of the dataset are randomly split. To address difficulties in applying these generative models under the few-shot regime, we propose a simple and pragmatic semi-supervised fine-tuning approach, and demonstrate gains in FID and precision-recall metrics as well as classification performance.}, 
	pages = {255-280}, 
	video = {https://youtu.be/AycdLmLoEnY}
}

@InProceedings{rostami2022, 
	title = {Increasing Model Generalizability for Unsupervised Visual Domain Adaptation}, 
	author = {Rostami, Mohammad}, 
	abstract = {A dominant approach for addressing unsupervised domain adaptation is to map data points for the source and the target domains into an embedding space which is modeled as the output-space of a shared deep encoder. The encoder is trained to make the embedding space domain-agnostic to make a source-trained classifier generalizable on the target domain. A secondary mechanism to improve UDA performance further is to make the source domain distribution more compact to improve model generalizability. We demonstrate that increasing the interclass margins in the embedding space can help to develop a UDA algorithm with improved performance. We estimate the internally learned multi-modal distribution for the source domain, learned as a result of pretraining, and use it to increase the interclass class separation in the source domain to reduce the effect of domain shift.   We demonstrate that using our approach leads to improved model generalizability on four standard benchmark UDA image classification datasets and compares favorably against exiting methods.}, 
	pages = {281-293}, 
	video = {https://youtu.be/SjxNFO8rK0k}
}

@InProceedings{zhou2022, 
	title = {Forgetting and Imbalance in Robot Lifelong Learning with Off-policy Data}, 
	author = {Zhou, Wenxuan and Bohez, Steven and Humplik, Jan and Heess, Nicolas and Abdolmaleki, Abbas and Rao, Dushyant and Wulfmeier, Markus and Haarnoja, Tuomas}, 
	abstract = {Robots will experience non-stationary environment dynamics throughout their lifetime:  the robot dynamics can change due to wear and tear, or its surroundings may change over time. Eventually, the robots should perform well in all of the environment variations it has encountered. At the same time, it should still be able to learn fast in a new environment. We identify two challenges in Reinforcement Learning (RL) under such a lifelong learning setting with off-policy data:  first, existing off-policy algorithms struggle with the trade-off between being conservative to maintain good performance in the old environment and learning efficiently in the new environment, despite keeping all the data in the replay buffer.  We propose the Offline Distillation Pipeline to break this trade-off by separating the training procedure into an online interaction phase and an offline distillation phase.Second, we find that training with the imbalanced off-policy data from multiple environments across the lifetime creates a significant performance drop. We identify that this performance drop is caused by the combination of the imbalanced quality and size among the datasets which exacerbate the extrapolation error of the Q-function. During the distillation phase, we apply a simple fix to the issue by keeping the policy closer to the behavior policy that generated the data.  In the experiments, we demonstrate these two challenges and the proposed solutions with a simulated bipedal robot walk-ing task across various environment changes. We show that the Offline Distillation Pipeline achieves better performance across all the encountered environments without affecting data collection.  We also provide a comprehensive empirical study to support our hypothesis on the data imbalance issue.}, 
	pages = {294-309}, 
	video = {https://youtu.be/r81w48Bo1vM}
}

@InProceedings{schaeffer2022, 
	title = {Streaming Inference for Infinite Non-Stationary Clustering}, 
	author = {Schaeffer, Rylan and Liu, Gabrielle Kaili-may and Du, Yilun and Linderman, Scott and Fiete, Ila R.}, 
	abstract = {Learning from a continuous stream of non-stationary data in an unsupervised manner is arguably one of the most common and most challenging settings facing intelligent agents. Here, we attack learning under all three conditions (unsupervised, streaming, non-stationary) in the context of clustering, also known as mixture modeling. We introduce a novel clustering algorithm that endows mixture models with the ability to create new clusters online, as demanded by the data, in a probabilistic, time-varying, and principled manner. To achieve this, we first define a novel stochastic process called the Dynamical Chinese Restaurant Process (Dynamical CRP), which is a non-exchangeable distribution over partitions of a set; next, we show that the Dynamical CRP provides a non-stationary prior over cluster assignments and yields an efficient streaming variational inference algorithm. We conclude with experiments showing that the Dynamical CRP can be applied on diverse synthetic and real data with Gaussian and non-Gaussian likelihoods.}, 
	pages = {310-326}, 
	video = {https://youtu.be/IQT5PYQAoC4}
}

@InProceedings{fraser2022, 
	title = {Inexperienced RL Agents Can't Get It Right: Lower Bounds on Regret at Finite Sample Complexity}, 
	author = {Fraser, Maia and L\'etourneau, Vincent}, 
	abstract = {We consider a family $\mathcal M$ of MDPs over given state and action spaces, and an agent that is sequentially confronted with tasks from $\mathcal M$. Although stated for this stepwise change in distributions, the insight we develop is informative for continually changing distributions as well. In order to study how structure of $\mathcal M$, viewed as a learning environment, impacts the learning efficiency of the agent, we formulate an RL analog of fat shattering dimension for MDP families and show that this implies a nontrivial lower bound on regret as long as insufficiently many steps have been taken. More precisely, for some constant $c$ which depends on shattering $d$ states, an inexperienced agent that has explored the learning environment for fewer than $d$ steps will necessarily have regret above $c$ on some MDP in the family.}, 
	pages = {327-334}, 
	video = {https://youtu.be/dsx81HGUI_8}
}

@InProceedings{bereska2022, 
	title = {Continual Learning of Dynamical Systems With Competitive Federated Reservoir Computing}, 
	author = {Bereska, Leonard and Gavves, Efstratios}, 
	abstract = {Machine learning recently proved efficient in learning differential equations and dynamical systems from data. However, the data is commonly assumed to originate from a single never-changing system. In contrast, when modeling real-world dynamical processes, the data distribution often shifts due to changes in the underlying system dynamics. Continual learning of these processes aims to rapidly adapt to abrupt system changes without forgetting previous dynamical regimes. This work proposes an approach to continual learning based on reservoir computing, a state-of-the-art method for training recurrent neural networks on complex spatiotemporal dynamical systems. Reservoir computing fixes the recurrent network weights - hence these cannot be forgotten - and only updates linear projection heads to the output. We propose to train multiple competitive prediction heads concurrently. Inspired by neuroscience’s predictive coding, only the most predictive heads activate, laterally inhibiting and thus protecting the inactive heads from forgetting induced by interfering parameter updates. We show that this multi-head reservoir minimizes interference and catastrophic forgetting on several dynamical systems, including the Van-der-Pol oscillator, the chaotic Lorenz attractor, and the high-dimensional Lorenz-96 weather model. Our results suggest that reservoir computing is a promising candidate framework for the continual learning of dynamical systems. We provide our code for data generation, method, and comparisons at \url{https://github.com/leonardbereska/multiheadreservoir.}}, 
	pages = {335-350}, 
	video = {https://youtu.be/YfrRUyejhe4}
}

@InProceedings{akakzia2022, 
	title = {Learning Object-Centered Autotelic Behaviors with Graph Neural Networks}, 
	author = {Akakzia, Ahmed and Sigaud, Olivier}, 
	abstract = {Although humans live in an open-ended world and endlessly face new challenges, they do not have to learn from scratch each time they face the next one. Rather, they have access to a handful of previously learned skills, which they rapidly adapt to new situations. In artificial intelligence, autotelic agents—which are intrinsically motivated to represent and set their own goals—exhibit promising skill adaptation capabilities. However, these capabilities are highly constrained by their policy and goal space representations. In this paper, we propose to investigate the impact of these representations on the learning capabilities of autotelic agents. We study different implementations of autotelic agents using four types of Graph Neural Networks policy representations and two types of goal spaces, either geometric or predicate-based. We show that combining object-centered architectures that are expressive enough with semantic relational goals enables an efficient transfer between skills and promotes behavioral diversity. We also release our graph-based implementations to encourage further research in this direction.}, 
	pages = {351-365}, 
	video = {https://youtu.be/OOLnmw6aK30}
}

@InProceedings{malviya2022, 
	title = {TAG: Task-based Accumulated Gradients for Lifelong learning}, 
	author = {Malviya, Pranshu and Ravindran, Balaraman and Chandar, Sarath}, 
	abstract = {When an agent encounters a continual stream of new tasks in the lifelong learning setting, it leverages the knowledge it gained from the earlier tasks to help learn the new tasks better. In such a scenario, identifying an efficient knowledge representation becomes a challenging problem. Most research works propose to either store a subset of examples from the past tasks in a replay buffer, dedicate a separate set of parameters to each task or penalize excessive updates over parameters by introducing a regularization term. While existing methods employ the general task-agnostic stochastic gradient descent update rule, we propose a task-aware optimizer that adapts the learning rate based on the relatedness among tasks. We utilize the directions taken by the parameters during the updates by additively accumulating the gradients specific to each task. These task-based accumulated gradients act as a knowledge base that is maintained and updated throughout the stream. We empirically show that our proposed adaptive learning rate not only accounts for catastrophic forgetting but also exhibits knowledge transfer. We also show that our method performs better than several state-of-the-art methods in lifelong learning on complex datasets. Moreover, our method can also be combined with the existing methods and achieve substantial improvement in performance.}, 
	pages = {366-389}, 
	video = {https://youtu.be/T-oJDIFbn9Q}
}

@InProceedings{bhat2022a, 
	title = {Task Agnostic Representation Consolidation: a Self-supervised based Continual Learning Approach}, 
	author = {Bhat, Prashant Shivaram and Zonooz, Bahram and Arani, Elahe}, 
	abstract = {Continual learning (CL) over non-stationary data streams remains one of the long-standing challenges in deep neural networks (DNNs) as they are prone to catastrophic forgetting. CL models can benefit from self-supervised pre-training as it enables learning more generalizable task-agnostic features. However, the effect of self-supervised pre-training diminishes as the length of task sequences increases. Furthermore, the domain shift between pre-training data distribution and the task distribution reduces the generalizability of the learned representations. To address these limitations, we propose Task Agnostic Representation Consolidation (TARC), a novel two-stage training paradigm for CL that intertwines task-agnostic and task-specific learning whereby self-supervised training is followed by supervised learning for each task. To further restrict the deviation from the learned representations in the self-supervised stage, we employ a task-agnostic auxiliary loss during the supervised stage. We show that our training paradigm can be easily added to memory- or regularization-based approaches and provides consistent performance gain across more challenging CL settings. We further show that it leads to more robust and well-calibrated models.}, 
	pages = {390-405}, 
	video = {https://youtu.be/bZZrmxLo0bk}
}

@InProceedings{chan2022, 
	title = {Zipfian Environments for Reinforcement Learning}, 
	author = {Chan, Stephanie C.Y. and Lampinen, Andrew Kyle and Richemond, Pierre Harvey and Hill, Felix}, 
	abstract = {As humans and animals learn in the natural world, they encounter distributions of entities, situations and events that are far from uniform. Typically, a relatively small set of experiences are encountered frequently, while many important experiences occur only rarely. The highly-skewed, heavy-tailed nature of reality poses particular learning challenges that humans and animals have met by evolving specialised memory systems. By contrast, most popular RL environments and benchmarks involve approximately uniform variation of properties, objects, situations or tasks. How will RL algorithms perform in worlds (like ours) where the distribution of environment features is far less uniform? To explore this question, we develop three complementary RL environments where the agent's experience varies according to a Zipfian (discrete power law) distribution. These environments will be made available as an open source library. On these benchmarks, we find that standard Deep RL architectures and algorithms acquire useful knowledge of common situations and tasks, but fail to adequately learn about rarer ones. To understand this failure better, we explore how different aspects of current approaches may be adjusted to help improve performance on rare events, and show that the RL objective function, the agent's memory system and self-supervised learning objectives can all influence an agent's ability to learn from uncommon experiences. Together, these results show that learning robustly from skewed experience is a critical challenge for applying Deep RL methods beyond simulations or laboratories, and our Zipfian environments provide a basis for measuring future progress towards this goal.}, 
	pages = {406-429}, 
	video = {https://youtu.be/BLFWnP1Idp4}
}

@InProceedings{munagala2022, 
	title = {CLActive: Episodic Memories for Rapid Active Learning}, 
	author = {Munagala, Sri Aurobindo and Subramanian, Sidhant and Karthik, Shyamgopal and Prabhu, Ameya and Namboodiri, Anoop}, 
	abstract = {Active Learning aims to solve the problem of alleviating labelling costs for large-scale datasets by selecting a subset of data to effectively train on. Deep Active Learning (DAL) techniques typically involve repeated training of a model for sample acquisition over the entire subset of labelled data available in each round. This can be prohibitively expensive to run in real-world scenarios with large and constantly growing data. Some work has been done to address this -- notably, Selection-Via-Proxy (SVP) proposed the use of a separate, smaller proxy model for acquisition. We explore further optimizations to the standard DAL setup and propose CLActive: an optimization procedure that brings significant speedups which maintains a constant training time for the selection model across rounds and retains information from past rounds using Experience Replay. We demonstrate large improvements in total train-time compared to the fully-trained baselines and SVP. We achieve up to 89$\times$, 7$\times$, 61$\times$ speedups over the fully-trained baseline at 50\% of dataset collection in CIFAR, Imagenet and Amazon Review datasets, respectively, with little accuracy loss. We also show that CLActive is robust against catastrophic forgetting in a challenging class-incremental active-learning setting. Overall, we believe that CLActive can effectively enable rapid prototyping and deployment of deep AL algorithms in real-world use cases across a variety of settings.}, 
	pages = {430-440}, 
	video = {https://youtu.be/xR5mTgVtADc}
}

@InProceedings{steinparz2022, 
	title = {Reactive Exploration to Cope With Non-Stationarity in Lifelong Reinforcement Learning}, 
	author = {Steinparz, Christian Alexander and Schmied, Thomas and Paischer, Fabian and Dinu, Marius-constantin and Patil, Vihang Prakash and Bitto-nemling, Angela and Eghbal-zadeh, Hamid and Hochreiter, Sepp}, 
	abstract = {In lifelong learning an agent learns throughout its entire life without resets, in a constantly changing environment, as we humans do. Consequently, lifelong learning comes with a plethora of research problems such as continual domain shifts, which result in non-stationary rewards and environmental dynamics. These non-stationarities, however, are difficult to detect and cope with due to their continuous nature. Therefore, exploration strategies and learning methods are required that are capable of tracking the steady domain shifts, and adapting to them. We propose Reactive Exploration to track and react to continual domain shifts in lifelong reinforcement learning, and to update the policy correspondingly. To this end, we conduct experiments in order to investigate different exploration strategies. We empirically show that policy-gradient algorithms are better suited for lifelong learning, as they adapt more quickly to distribution shifts than Q-learning. Thereby, policy-gradient methods profit the most from Reactive Exploration and show good results in lifelong learning with continual domain shifts.}, 
	pages = {441-469}, 
	video = {https://youtu.be/pcY7V1nFsbU}
}

@InProceedings{schweighofer2022, 
	title = {A Dataset Perspective on Offline Reinforcement Learning}, 
	author = {Schweighofer, Kajetan and Dinu, Marius-constantin and Radler, Andreas and Hofmarcher, Markus and Patil, Vihang Prakash and Bitto-nemling, Angela and Eghbal-zadeh, Hamid and Hochreiter, Sepp}, 
	abstract = {The application of Reinforcement Learning (RL) in real world environments can be expensive or risky due to sub-optimal policies during training. In Offline RL, this problem is avoided since interactions with an environment are prohibited. Policies are learned from a given dataset, which solely determines their performance. Despite this fact, how dataset characteristics influence Offline RL algorithms is still hardly investigated. The dataset characteristics are determined by the behavioral policy that samples this dataset. Therefore, we define characteristics of behavioral policies as exploratory for yielding high expected information in their interaction with the Markov Decision Process (MDP) and as exploitative for having high expected return. We implement two corresponding empirical measures for the datasets sampled by the behavioral policy in deterministic MDPs. The first empirical measure SACo is defined by the normalized unique state-action pairs and captures exploration. The second empirical measure TQ is defined by the normalized average trajectory return and captures exploitation. Empirical evaluations show the effectiveness of TQ and SACo. In large-scale experiments using our proposed measures, we show that the unconstrained off-policy Deep Q-Network family requires datasets with high SACo to find a good policy. Furthermore, experiments show that policy constraint algorithms perform well on datasets with high TQ and SACo. Finally, the experiments show, that purely dataset-constrained Behavioral Cloning performs competitively to the best Offline RL algorithms for datasets with high TQ.}, 
	pages = {470-517}, 
	video = {https://youtu.be/gYOieDpvhew}
}

@InProceedings{plas2022, 
	title = {Predictive Learning Enables Neural Networks to Learn Complex Working Memory Tasks}, 
	author = {Van der Plas, Thijs Lambik and Manohar, Sanjay G. and Vogels, Tim P.}, 
	abstract = {Brains are thought to engage in predictive learning - learning to predict upcoming stimuli - to construct an internal model of their environment. This is especially notable for spatial navigation, as first described by Tolman's latent learning tasks. However, predictive learning has also been observed in sensory cortex, in settings unrelated to spatial navigation. Apart from normative frameworks such as active inference or efficient coding, what could be the utility of learning to predict the patterns of occurrence of correlated stimuli? Here we show that prediction, and thereby the construction of an internal model of sequential stimuli, can bootstrap the learning process of a working memory task in a recurrent neural network. We implemented predictive learning alongside working memory match-tasks, and networks emerged to solve the prediction task first by encoding information across time to predict upcoming stimuli, and then eavesdropped on this solution to solve the matching task. Eavesdropping was most beneficial when neural resources were limited. Hence, predictive learning acts as a general neural mechanism to learn to store sensory information that can later be essential for working memory tasks.}, 
	pages = {518-531}, 
	video = {https://youtu.be/0_Os3Ak7iEg}
}

@InProceedings{cisneros2022, 
	title = {Benchmarking Learning Efficiency in Deep Reservoir Computing}, 
	author = {Cisneros, Hugo and Mikolov, Tomas and Sivic, Josef}, 
	abstract = {It is common to evaluate the performance of a machine learning model by measuring its predictive power on a test dataset. This approach favors complicated models that can smoothly fit complex functions and generalize well from training data points. Although essential components of intelligence, speed and data efficiency of this learning process are rarely reported or compared between different candidate models. In this paper, we introduce a benchmark of increasingly difficult tasks together with a data efficiency metric to measure how quickly machine learning models learn from training data. We compare the learning speed of some established sequential supervised models, such as RNNs, LSTMs, or Transformers, with relatively less known alternative models based on reservoir computing. The proposed tasks require a wide range of computational primitives, such as memory or the ability to compute Boolean functions, to be effectively solved. Surprisingly, we observe that reservoir computing systems that rely on dynamically evolving feature maps learn faster than fully supervised methods trained with stochastic gradient optimization while achieving comparable accuracy scores. The code, benchmark, trained models, and results to reproduce our experiments are available at \url{https://github.com/hugcis/benchmark_learning_efficiency}.}, 
	pages = {532-547}, 
	video = {https://youtu.be/6GSpvqqV9XI}
}

@InProceedings{kim2022, 
	title = {A Multi-Head Model for Continual Learning via Out-of-Distribution Replay}, 
	author = {Kim, Gyuhak and Liu, Bing and Ke, Zixuan}, 
	abstract = {This paper studies class incremental learning (CIL) of continual learning (CL).  Many approaches have been proposed to deal with catastrophic forgetting (CF) in CIL. Most methods incrementally construct a single classifier for all classes of all tasks in a single head network. To prevent CF, a popular approach is to memorize a small number of samples from previous tasks and replay them during  training of the new task. However, this approach still suffers from serious CF as the parameters learned for previous tasks are updated or adjusted with only the limited number of saved samples in the memory. This paper proposes an entirely different approach that builds a separate classifier (head) for each task (called a multi-head model) using a transformer network, called MORE. Instead of using the saved samples in memory to update the network for previous tasks/classes in the existing approach, MORE leverages the saved samples to build a task specific classifier (adding a new classification head) without updating the network learned for previous tasks/classes. The model for the new task in MORE is trained to learn the classes of the task and also to detect samples that are not from the same data distribution (i.e., out-of-distribution (OOD)) of the task. This enables the classifier for the task to which the test instance belongs to produce a high score for the correct class and the classifiers of other tasks to produce low scores because the test instance is not from the data distributions of these classifiers. Experimental results show that MORE outperforms state-of-the-art baselines and is also naturally capable of performing OOD detection in the continual learning setting.}, 
	pages = {548-563}, 
	video = {https://youtu.be/LFo-AhsCG1M}
}

@InProceedings{rahman2022, 
	title = {On the Limitations of Continual Learning for Malware Classification}, 
	author = {Rahman, Mohammad Saidur and Coull, Scott and Wright, Matthew}, 
	abstract = {Malicious software (malware) classification offers a unique challenge for continual learning (CL) regimes due to the volume of new samples received on a daily basis and the evolution of malware to exploit new vulnerabilities. On a typical day, antivirus vendors receive hundreds of thousands of unique pieces of software, both malicious and benign, and over the course of the lifetime of a malware classifier, more than a billion samples can easily accumulate. Given the scale of the problem, sequential training using continual learning techniques could provide substantial benefits in reducing training and storage overhead. To date, however, there has been no exploration of CL applied to malware classification tasks. In this paper, we study 11 CL techniques applied to three malware tasks covering common incremental learning scenarios, including task, class, and domain incremental learning (IL). Specifically, using two realistic, large-scale malware datasets, we evaluate the performance of the CL methods on both binary malware classification (Domain-IL) and multi-class malware family classification (Task-IL and Class-IL) tasks. To our surprise, continual learning methods significantly underperformed naive {\em Joint} replay of the training data in nearly all settings -- in some cases reducing accuracy by more than 70 percentage points. A simple approach of selectively replaying 20\% of the stored data achieves better performance, with 50\% of the training time compared to {\em Joint} replay. Finally, we discuss potential reasons for the unexpectedly poor performance of the CL techniques, with the hope that it spurs further research on developing techniques that are more effective in the malware classification domain.}, 
	pages = {564-582}, 
	video = {https://youtu.be/EEQoz91wx-M}
}

@InProceedings{lechner2022, 
	title = {Inherent Limitations of Multi-Task Fair Representations}, 
	author = {Lechner, Tosca and Ben-David, Shai}, 
	abstract = {With the growing awareness to fairness in machine learning and the realization of the central role that data representation has in data processing tasks, there is an obvious interest in notions of fair data representations. The goal of such representations is that a model trained on data under the representation (e.g., a classifier) will be guaranteed to respect some fairness constraints, while still being expressive enough to model the task well. Such representations are useful when they can be fixed for training models on various different tasks and also when they serve as data filtering between the raw data (available to the representation designer) and potentially malicious agents that use the data under the representation to learn predictive models and make decisions. A long list of recent research papers strive to provide tools for achieving these goals. However, we prove that in most cases, such goals are inaccessible! Roughly stated, we prove that no representation can guarantee the fairness of classifiers for different tasks trained using it (while retaining the needed expressive powers). The reasons for this impossibility depend on the notion of fairness one aims to achieve. For the basic ground-truth-independent notion of Demographic (or Statistical) Parity, the obstacle is conceptual; a representation that guarantees such fairness inevitably depends on the marginal (unlabeled) distribution of the relevant instances, and in most cases that distribution changes from one task to another. For more refined notions of fairness, that depend on some ground truth classification, like Equalized Odds (requiring equality of error rates between groups), fairness cannot be guaranteed by a representation that does not take into account the task specific labeling rule with respect to which such fairness will be evaluated (even if the marginal data distribution is known a priori). Furthermore, for tasks sharing the same marginal distribution, we prove that except for trivial cases, no representation can guarantee Equalized Odds fairness for any two different tasks while enabling accurate label predictions for both.}, 
	pages = {583-603}, 
	video = {https://youtu.be/r_nrLxrPQrY}
}

@InProceedings{kearney2022, 
	title = {What Should I Know? Using Meta-Gradient Descent for Predictive Feature Discovery in a Single Stream of Experience}, 
	author = {Kearney, Alex and Koop, Anna and G\"unther, Johannes and Pilarski, Patrick M.}, 
	abstract = {In computational reinforcement learning, a growing body of work seeks to construct an agent's perception of the world through predictions of future sensations; predictions about environment observations are used as additional input features to enable better goal-directed decision-making. An open challenge in this line of work is determining from the infinitely many predictions that the agent could possibly make which predictions might best support decision-making. This challenge is especially apparent in continual learning problems where a single stream of experience is available to a singular agent. As a primary contribution, we introduce a meta-gradient descent process by which an agent learns 1) what predictions to make, 2) the estimates for its chosen predictions, and 3) how to use learned estimates to generate policies that maximize future reward---all during a single ongoing process of continual learning. In this manuscript we consider predictions expressed as General Value Functions: temporally extended estimates of the accumulation of a future signal. We demonstrate that through interaction with the environment an agent can independently select predictions that resolve partial-observability, resulting in performance similar to, or better than expertly chosen General Value Functions in two domains. By learning, rather than manually specifying these predictions, we enable the agent to identify useful predictions in a self-supervised manner, taking a step towards truly autonomous systems.}, 
	pages = {604-616}, 
	video = {https://youtu.be/YExYsYIsG_Y}
}

@InProceedings{abbasi2022, 
	title = {Sparsity and Heterogeneous Dropout for Continual Learning in the Null Space of Neural Activations}, 
	author = {Abbasi, Ali and Nooralinejad, Parsa and Braverman, Vladimir and Pirsiavash, Hamed and Kolouri, Soheil}, 
	abstract = {Continual/lifelong learning from a non-stationary input data stream is a cornerstone of intelligence. Despite their phenomenal performance in a wide variety of applications, deep neural networks are prone to forgetting their previously learned information upon learning new ones. This phenomenon is called ``catastrophic forgetting'' and is deeply rooted in the stability-plasticity dilemma. Overcoming catastrophic forgetting in deep neural networks has become an active field of research in recent years. In particular, gradient projection-based methods have recently shown exceptional performance at overcoming catastrophic forgetting. This paper proposes two biologically-inspired mechanisms based on sparsity and heterogeneous dropout that significantly increase a continual learner's performance over a long sequence of tasks. Our proposed approach builds on the Gradient Projection Memory (GPM) framework. We leverage k-winner activations in each layer of a neural network to enforce layer-wise sparse activations for each task, together with a between-task heterogeneous dropout that encourages the network to use non-overlapping activation patterns between different tasks. In addition, we introduce two new benchmarks for continual learning under distributional shift, namely Continual Swiss Roll and ImageNet SuperDog-40. Lastly, we provide an in-depth analysis of our proposed method and demonstrate a significant performance boost on various benchmark continual learning problems.}, 
	pages = {617-628}, 
	video = {https://youtu.be/w9zgIj1dpso}
}

@InProceedings{ji2022, 
	title = {Test Sample Accuracy Scales with Training Sample Density in Neural Networks}, 
	author = {Ji, Xu and Pascanu, Razvan and Hjelm, R. Devon and Lakshminarayanan, Balaji and Vedaldi, Andrea}, 
	abstract = {Intuitively, one would expect accuracy of a trained neural network's prediction on test samples to correlate with how densely the samples are surrounded by seen training samples in representation space. We find that a bound on empirical training error smoothed across linear activation regions scales inversely with training sample density in representation space. Empirically, we verify this bound is a strong predictor of the inaccuracy of the network's prediction on test samples. For unseen test sets, including those with out-of-distribution samples, ranking test samples by their local region's error bound and discarding samples with the highest bounds raises prediction accuracy by up to 20\% in absolute terms for image classification datasets, on average over thresholds.}, 
	pages = {629-646}, 
	video = {https://youtu.be/GF_-LcdudXI}
}

@InProceedings{prado2022, 
	title = {A Theory for Knowledge Transfer in Continual Learning}, 
	author = {Prado, Diana Benavides and Riddle, Patricia}, 
	abstract = {Continual learning of a stream of tasks is an active area in deep neural networks. The main challenge investigated has been the phenomenon of catastrophic forgetting or interference of newly acquired knowledge with knowledge from previous tasks. Recent work has investigated forward knowledge transfer to new tasks. Backward transfer for improving knowledge gained during previous tasks has received much less attention. There is in general limited understanding of how knowledge transfer could aid tasks learned continually. We present a theory for knowledge transfer in continual supervised learning, which considers both forward and backward transfer. We aim at understanding their impact for increasingly knowledgeable learners. We derive error bounds for each of these transfer mechanisms. These bounds are agnostic to specific implementations (e.g. deep neural networks). We demonstrate that, for a continual learner that observes related tasks, both forward and backward transfer can contribute to an increasing performance as more tasks are observed.}, 
	pages = {647-660}, 
	video = {https://youtu.be/rs119ZMh7_0}
}

@InProceedings{corrado2022, 
	title = {Simulation-Acquired Latent Action Spaces for Dynamics Generalization}, 
	author = {Corrado, Nicholas and Qu, Yuxiao and Hanna, Josiah P.}, 
	abstract = {Deep reinforcement learning has shown incredible promise at training high-performing agents to solve high-dimensional continuous control tasks in a particular training environment. However, to be useful in real-world settings, long-lived agents must perform well across a range of environmental conditions. Naively applying deep RL to a task where environment conditions may vary from episode to episode can be data inefficient. To address this inefficiency, we introduce a method that discovers structure in an agent's high-dimensional continuous action space to speed up learning across a range of environmental conditions. Whereas prior work on finding so-called latent action spaces requires expert demonstrations or on-task experience, we instead propose to discover the latent, lower-dimensional action space in a simulated source environment and then transfer the learned action space for training in the target environment. We evaluate our novel method on randomized variants of simulated MuJoCo environments and find that, when there is a lower-dimensional action-space to exploit, our method significantly increases data efficiency. For instance, in the Ant environment, our method reduces the 8-dimensional action-space to a 3-dimensional action-space and doubles the average return achieved after a training budget of 2 million timesteps. }, 
	pages = {661-682}, 
	video = {https://youtu.be/HMdjfxP7BRo}
}

@InProceedings{powers2022a, 
	title = {Self-Activating Neural Ensembles for Continual Reinforcement Learning}, 
	author = {Powers, Sam and Xing, Eliot and Gupta, Abhinav}, 
	abstract = {The ability for an agent to continuously learn new skills without catastrophically forgetting existing knowledge is of critical importance for the development of generally intelligent agents. Most methods devised to address this problem depend heavily on well-defined task boundaries, and thus depend on human supervision. Our task-agnostic method, Self-Activating Neural Ensembles (SANE), uses a modular architecture designed to avoid catastrophic forgetting without making any such assumptions. At the beginning of each trajectory, a module in the SANE ensemble is activated to determine the agent’s next policy. During training, new modules are created as needed and only activated modules are updated to ensure that unused modules remain unchanged. This system enables our method to retain and leverage old skills, while growing and learning new ones. We demonstrate our approach on visually rich procedurally generated environments.}, 
	pages = {683-704}, 
	video = {https://youtu.be/hWtiGrrocbs}
}

@InProceedings{powers2022b, 
	title = {CORA: Benchmarks, Baselines, and Metrics as a Platform for Continual Reinforcement Learning Agents}, 
	author = {Powers, Sam and Xing, Eliot and Kolve, Eric and Mottaghi, Roozbeh and Gupta, Abhinav}, 
	abstract = {Progress in continual reinforcement learning has been limited due to several barriers to entry: missing code, high compute requirements, and a lack of suitable benchmarks. In this work, we present CORA, a platform for Continual Reinforcement Learning Agents that provides benchmarks, baselines, and metrics in a single code package. The benchmarks we provide are designed to evaluate different aspects of the continual RL challenge, such as catastrophic forgetting, plasticity, ability to generalize, and sample-efficient learning. Three of the benchmarks utilize video game environments (Atari, Procgen, NetHack). The fourth benchmark, CHORES, consists of four different task sequences in a visually realistic home simulator, drawn from a diverse set of task and scene parameters. To compare continual RL methods on these benchmarks, we prepare three metrics in CORA: Continual Evaluation, Isolated Forgetting, and Zero-Shot Forward Transfer. Finally, CORA includes a set of performant, open-source baselines of existing algorithms for researchers to use and expand on. We release CORA and hope that the continual RL community can benefit from our contributions, to accelerate the development of new continual RL algorithms.}, 
	pages = {705-743}, 
	video = {https://youtu.be/R3BkNbPIUL8}
}

@InProceedings{hayes2022, 
	title = {Online Continual Learning for Embedded Devices}, 
	author = {Hayes, Tyler L. and Kanan, Christopher}, 
	abstract = {Real-time on-device continual learning is needed for new applications such as home robots, user personalization on smartphones, and augmented/virtual reality headsets. However, this setting poses unique challenges: embedded devices have limited memory and compute capacity and conventional machine learning models suffer from catastrophic forgetting when updated on non-stationary data streams. While several online continual learning models have been developed, their effectiveness for embedded applications has not been rigorously studied. In this paper, we first identify criteria that online continual learners must meet to effectively perform real-time, on-device learning. We then study the efficacy of several online continual learning methods when used with mobile neural networks. We measure their performance, memory usage, compute requirements, and ability to generalize to out-of-domain inputs.}, 
	pages = {744-766}, 
	video = {https://youtu.be/q-FlrczywT4}
}

@InProceedings{wang2022, 
	title = {Heat-RL: Online Model Selection for Streaming Time-Series Anomaly Detection}, 
	author = {Wang, Yujing and Xiong, Luoxin and Zhang, Mingliang and Xue, Hui and Chen, Qi and Yang, Yaming and Tong, Yunhai and Huang, Congrui and Xu, Bixiong}, 
	abstract = {Time-series anomaly detection plays an important role in various applications. In a commercial system, anomaly detection models are either unsupervised or pre-trained in a self-supervised manner offline; while in the online serving stage, an appropriate model should be selected to fulfill each customer’s requirement with only a few human interactions. Existing online model selection methods do not have good data efficiency, failing to achieve good performance with limited number of manual feedbacks. In this paper, we propose Heat-RL, a novel reinforcement learning algorithm tailored to online model selection for streaming time-series data. Specifically, we design a new state based on metric-oriented heatmaps and apply ResNet for policy and value networks to capture the correlations among similar model configurations. Experiments demonstrated the effectiveness of Heat-RL on both academic and industrial datasets. On all datasets, the average F1 and last F1 scores have been improved by 5.5\% and 14.6\% respectively compared to the best state-of-the-art solution.}, 
	pages = {767-777}, 
	video = {https://youtu.be/yinN3cUeZHk}
}

@InProceedings{lai2022, 
	title = {Lifelong DP: Consistently Bounded Differential Privacy in Lifelong Machine Learning}, 
	author = {Lai, Phung and Hu, Han and Phan, Hai and Jin, Ruoming and Thai, My and Chen, An}, 
	abstract = {In this paper, we show that the process of continually learning new tasks and memorizing previous tasks introduces unknown privacy risks and challenges to bound the privacy loss. Based upon this, we introduce a formal definition of Lifelong DP, in which the participation of any data tuples in the training set of any tasks is protected, under a consistently bounded DP protection, given a growing stream of tasks. A consistently bounded DP means having only one fixed value of the DP privacy budget, regardless of the number of tasks. To preserve Lifelong DP, we propose a scalable and heterogeneous algorithm, called L2DP-ML with a streaming batch training, to efficiently train and continue releasing new versions of an L2M model, given the heterogeneity in terms of data sizes and the training order of tasks, without affecting DP protection of the private training set. An end-to-end theoretical analysis and thorough evaluations show that our mechanism is significantly better than baseline approaches in preserving Lifelong DP. The implementation of L2DP-ML is available at: https://github.com/haiphanNJIT/PrivateDeepLearning.}, 
	pages = {778-797}, 
	video = {https://youtu.be/Odhjk8dCqGs}
}

@InProceedings{guillet2022, 
	title = {Neural Distillation as a State Representation Bottleneck in Reinforcement Learning}, 
	author = {Guillet, Valentin and Wilson, Dennis George and Rachelson, Emmanuel}, 
	abstract = {Learning a good state representation is a critical skill when dealing with multiple tasks in Reinforcement Learning as it allows for transfer and better generalization between tasks. However, defining what constitute a useful representation is far from simple and there is so far no standard method to find such an encoding. In this paper, we argue that distillation --- a process that aims at imitating a set of given policies with a single neural network --- can be used to learn a state representation displaying favorable characteristics. In this regard, we define three criteria that measure desirable features of a state encoding: the ability to select important variables in the input space, the ability to efficiently separate states according to their corresponding optimal action, and the robustness of the state encoding on new tasks. We first evaluate these criteria and verify the contribution of distillation on state representation on a toy environment based on the standard inverted pendulum problem, before extending our analysis on more complex visual tasks from the Atari and Procgen benchmarks.}, 
	pages = {798-818}, 
	video = {https://youtu.be/d93Aj_5y8bs}
}

@InProceedings{lubana2022, 
	title = {How do Quadratic Regularizers Prevent Catastrophic Forgetting: The Role of Interpolation}, 
	author = {Lubana, Ekdeep Singh and Trivedi, Puja and Koutra, Danai and Dick, Robert}, 
	abstract = {Catastrophic forgetting undermines the effectiveness of deep neural networks (DNNs) in scenarios such as continual learning and lifelong learning. While several methods have been proposed to tackle this problem, there is limited work explaining why these methods work well. This paper has the goal of better explaining a popularly used technique for avoiding catastrophic forgetting: quadratic regularization. We show that quadratic regularizers prevent forgetting of past tasks by interpolating current and previous values of model parameters at every training iteration. Over multiple training iterations, this interpolation operation reduces the learning rates of more important model parameters, thereby minimizing their movement. Our analysis also reveals two drawbacks of quadratic regularization: (a) dependence of parameter interpolation on training hyperparameters, which often leads to training instability and (b) assignment of lower importance to deeper layers, which are generally the place forgetting occurs in DNNs. Via a simple modification to the order of operations, we show these drawbacks can be easily avoided, resulting in 6.2\% higher average accuracy at 4.5\% lower average forgetting. We confirm the robustness of our results by training over 2000 models in different settings.}, 
	pages = {819-837}, 
	video = {https://youtu.be/gd5YzEnULHU}
}

@InProceedings{xie2022, 
	title = {Lifelong Robotic Reinforcement Learning by Retaining Experiences}, 
	author = {Xie, Annie and Finn, Chelsea}, 
	abstract = {Multi-task learning ideally allows embodied agents such as robots to acquire a diverse repertoire of useful skills. However, many multi-task reinforcement learning efforts assume the agent can collect data from all tasks at all times, which can be unrealistic for physical agents that can only attend to one task at a time. Motivated by the practical constraints of physical learning systems, this work studies lifelong learning as a more natural multi-task learning setup. We present an approach that effectively leverages data collected from previous tasks to cumulatively and efficiently grow the robot's skill-set. In a series of simulated robotic manipulation experiments, our approach requires less than half the samples than learning each task from scratch, while avoiding the impractical round-robin data collection scheme. On a Franka Emika Panda robot arm, our approach incrementally solves ten challenging tasks, including bottle capping and block insertion.}, 
	pages = {838-855}, 
	video = {https://youtu.be/ZkoRIaPm8jo}
}

@InProceedings{matthews2022, 
	title = {Hierarchical Kickstarting for Skill Transfer in Reinforcement Learning}, 
	author = {Matthews, Michael and Samvelyan, Mikayel and Parker-holder, Jack and Grefenstette, Edward and Rockt\"{a}schel, Tim}, 
	abstract = {Practising and honing skills forms a fundamental component of how humans learn, yet artificial agents are rarely specifically trained to perform them. Instead, they are usually trained end-to-end, with the hope being that useful skills will be implicitly learned in order to maximise discounted return of some extrinsic reward function. In this paper, we investigate how skills can be incorporated into the training of reinforcement learning (RL) agents in complex environments with large state-action spaces and sparse rewards. To this end, we created SkillHack, a benchmark of tasks and associated skills based on the game of NetHack. We evaluate a number of baselines on this benchmark, as well as our own novel skill-based method Hierarchical Kickstarting (HKS), which is shown to outperform all other evaluated methods. Our experiments show that learning with a prior knowledge of useful skills can significantly improve the performance of agents on complex problems. We ultimately argue that utilising predefined skills provides a useful inductive bias for RL problems, especially those with large state-action spaces and sparse rewards.}, 
	pages = {856-874}, 
	video = {https://youtu.be/xDwIuD3-sOY}
}

@InProceedings{esmaeilpour2022, 
	title = {Open Set Recognition Via Augmentation-Based Similarity Learning}, 
	author = {Esmaeilpour, Sepideh and Shu, Lei and Liu, Bing}, 
	abstract = {The primary assumption of conventional supervised learning or classification is that the test samples are drawn from the same distribution as the training samples, which is called closed set learning or classification. In many practical scenarios, this is not the case because there are unknowns or unseen class samples in the test data, which is called the open set scenario, and the unknowns need to be detected. This problem is referred to as the open set recognition problem and is important in safety-critical applications. We propose to detect unknowns (or unseen class samples) through learning pairwise similarities. The proposed method works in two steps. It first learns a closed set classifier using the seen classes that have appeared in training and then, learns how to compare seen classes with pseudo-unseen (automatically generated unseen class samples). The pseudo-unseen generation is carried out by performing distribution shifting augmentations on the seen or training samples. We call our method OPG (Open set recognition based on Pseudo unseen data Generation). The experimental evaluation shows that the learned similarity-based features can successfully distinguish seen from unseen in benchmark datasets for open set recognition.}, 
	pages = {875-885}, 
	video = {https://youtu.be/ydBooMDtmtw}
}

@InProceedings{luketina2022, 
	title = {Meta-Gradients in Non-Stationary Environments}, 
	author = {Luketina, Jelena and Flennerhag, Sebastian and Schroecker, Yannick and Abel, David and Zahavy, Tom and Singh, Satinder}, 
	abstract = {Meta-gradient methods (Xu et al., 2018; Zahavy et al., 2020) offer a promising solution to the problem of hyperparameter selection and adaptation in non-stationary reinforcement learning problems. However, the properties of meta-gradients in such environments have not been systematically studied. In this work, we bring new clarity to meta-gradients in non-stationary environments. Concretely, we ask: (i) how much information should be given to the learned optimizers, so as to enable faster adaptation and generalization over a lifetime, (ii) what meta-optimizer functions are learned in this process, and (iii) whether meta-gradient methods provide a bigger advantage in highly non-stationary environments. To study the effect of information provided to the meta-optimizer, as in recent works (Flennerhaget al., 2021; Almeida et al., 2021), we replace the tuned meta-parameters of fixed update rules with learned meta-parameter functions of selected context features. The context features carry information about agent performance and changes in the environment and hence can inform learned meta-parameter schedules. We find that adding more contextual information is generally beneficial, leading to faster adaptation of meta-parameter values and increased performance over a lifetime. We support these results with a qualitative analysis of resulting meta parameter schedules and learned functions of context features. Lastly, we find that without context, meta-gradients do not provide a consistent advantage over the baseline in highly non-stationary environments. Our findings suggest that contextualizing meta-gradients can play a pivotal role in extracting high performance from meta-gradients in non-stationary settings.}, 
	pages = {886-901}, 
	video = {https://youtu.be/z8mfGaB8MC8}
}

@InProceedings{salter2022, 
	title = {MO2: Model-Based Offline Options}, 
	author = {Salter, Sasha and Wulfmeier, Markus and Tirumala, Dhruva and Heess, Nicolas and Riedmiller, Martin and Hadsell, Raia and Rao, Dushyant}, 
	abstract = {The ability to discover useful behaviours from past experience and transfer them to new tasks is considered a core component of natural embodied intelligence.  Inspired by neuroscience, discovering behaviours that switch at bottleneck states have been long sought after for inducing plans of minimum description length across tasks.  Prior approaches have either only supported online, on-policy, bottleneck state discovery, limiting sample-efficiency, or discrete state-action domains, restricting applicability.  To address this,  we introduce Model-Based Offline Options (MO2),  an offline hindsight framework supporting sample-efficient bottleneck option discovery over continuous state-action spaces.  Once bottleneck options are learnt offline over source domains,  they are transferred online to improve exploration and value estimation on the transfer domain.  Our experiments show that on complex long-horizon continuous control tasks with sparse, delayed rewards, MO2’s properties are essential and lead to performance exceeding recent option learning methods. Additional ablations further demonstrate the impact on option predictability and credit assignment.}, 
	pages = {902-919}, 
	video = {https://youtu.be/NpMg_YqE7qw}
}

@InProceedings{sarfraz2022, 
	title = {SYNERgy between SYNaptic Consolidation and Experience Replay for General Continual Learning}, 
	author = {Sarfraz, Fahad and Arani, Elahe and Zonooz, Bahram}, 
	abstract = {Continual learning (CL) in the brain is facilitated by a complex set of mechanisms. This includes the interplay of multiple memory systems for consolidating information as posited by the complementary learning systems (CLS) theory and synaptic consolidation for protecting the acquired knowledge from erasure. Therefore, we propose a general CL method that creates a synergy between SYNaptic consolidation and dual memory Experience Replay (SYNERgy). Our method maintains a semantic memory that accumulates and consolidates information across the tasks and interacts with the episodic memory for effective replay. It further employs synaptic consolidation by tracking the importance of parameters during the training trajectory and anchoring them to the consolidated parameters in the semantic memory. To the best of our knowledge, our study is the first to employ dual memory experience replay and synaptic consolidation in conjunction which is suitable for general CL whereby the network does not utilize task boundaries or task labels during training or inference. Our extensive evaluation on various challenging CL scenarios and characteristics analyzes demonstrate the efficacy of incorporating both synaptic consolidation and CLS theory in enabling effective CL in DNNs.}, 
	pages = {920-936}, 
	video = {https://youtu.be/wS2m0YFO7l4}
}

@InProceedings{gurulingan2022, 
	title = {Curbing Task Interference using Representation Similarity-Guided Multi-Task Feature Sharing}, 
	author = {Gurulingan, Naresh Kumar and Arani, Elahe and Zonooz, Bahram}, 
	abstract = {Multi-task learning of dense prediction tasks, by sharing both the encoder and decoder, as opposed to sharing only the encoder, provides an attractive front to increase both accuracy and computational efficiency. When the tasks are similar, sharing the decoder serves as an additional inductive bias providing more room for tasks to share complementary information among themselves. However, increased sharing exposes more parameters to task interference which likely hinders both generalization and robustness. Effective ways to curb this interference while exploiting the inductive bias of sharing the decoder remains an open challenge. To address this challenge, we propose Progressive Decoder Fusion (PDF) to progressively combine task decoders based on inter-task representation similarity. We show that this procedure leads to a multi-task network with better generalization to in-distribution and out-of-distribution data and improved robustness to adversarial attacks. Additionally, we observe that the predictions of different tasks of this multi-task network are more consistent among each other.}, 
	pages = {937-951}, 
	video = {https://youtu.be/w1Ey-VJoVqc}
}

@InProceedings{ramkumar2022, 
	title = {Differencing based Self-supervised pretraining for Scene Change Detection}, 
	author = {Ramkumar, Vijaya Raghavan T. and Arani, Elahe and Zonooz, Bahram}, 
	abstract = {Scene change detection (SCD), a crucial perception task, identifies changes by comparing scenes captured at different times. SCD is challenging due to noisy changes in illumination, seasonal variations, and perspective differences across a pair of views. Deep neural networks (DNNs)-based solutions require a large quantity of annotated data which is tedious and expensive to obtain. On the other hand, transfer learning from large datasets induces domain shift. To address these challenges, we propose a novel Differencing Self-supervised Pretraining  (DSP) method that uses feature differencing to learn discriminatory representations corresponding to the changed regions while simultaneously tackling the noisy changes by enforcing temporal invariance across views. Our experimental results on SCD datasets demonstrate the effectiveness of our method, specifically to differences in camera viewpoints and lighting conditions. Compared against the standard ImageNet pretraining that uses more than a million additional labeled images, DSP can surpass it without using any additional data. Our results also demonstrate the robustness of DSP to natural corruptions, distribution shift, and learning under limited labeled data.}, 
	pages = {952-965}, 
	video = {https://youtu.be/53YVk9Lsa5A}
}

@InProceedings{fatras2022, 
	title = {Optimal Transport meets Noisy Label Robust Loss and MixUp Regularization for Domain Adaptation}, 
	author = {Fatras, Kilian and Naganuma, Hiroki and Mitliagkas, Ioannis}, 
	abstract = {It is common in computer vision to be confronted with domain shift: images which have the same class but different acquisition conditions. In domain adaptation (DA), one wants to classify unlabeled target images using source labeled images. Unfortunately, deep neural networks trained on a source training set perform poorly on target images which do not belong to the training domain. One strategy to improve these performances is to align the source and target image distributions in an embedded space using optimal transport (OT). To compute OT, most methods use the minibatch optimal transport approximation which causes negative transfer, i.e. aligning samples with different labels, and leads to overfitting. In this work, we mitigate negative alignment by explaining it as a noisy label assignment to target images. We then mitigate its effect by appropriate regularization. We propose to couple the MixUp regularization with a loss that is robust to noisy labels in order to improve domain adaptation performance. We show in an extensive ablation study that a combination of the two techniques is critical to achieve improved performance. Finally, we evaluate our method, called mixunbot, on several benchmarks and real-world DA problems.}, 
	pages = {966-981}, 
	video = {https://youtu.be/c_9ozkPdxqw}
}

@InProceedings{mendez2022, 
	title = {CompoSuite: A Compositional Reinforcement Learning Benchmark}, 
	author = {Mendez, Jorge A. and Hussing, Marcel and Gummadi, Meghna and Eaton, Eric}, 
	abstract = {We present CompoSuite, an open-source simulated robotic manipulation benchmark for compositional multi-task reinforcement learning (RL). Each CompoSuite task requires a particular robot arm to manipulate one individual object to achieve a task objective while avoiding an obstacle. This compositional definition of the tasks endows CompoSuite with two remarkable properties. First, varying the robot/object/objective/obstacle elements leads to hundreds of RL tasks, each of which requires a meaningfully different behavior. Second, RL approaches can be evaluated specifically for their ability to learn the compositional structure of the tasks. This latter capability to functionally decompose problems would enable intelligent agents to identify and exploit commonalities between learning tasks to handle large varieties of highly diverse problems. We benchmark existing single-task, multi-task, and compositional learning algorithms on various training settings, and assess their capability to compositionally generalize to unseen tasks. Our evaluation exposes the shortcomings of existing RL approaches with respect to compositionality and opens new avenues for investigation.}, 
	pages = {982-1003}, 
	video = {https://youtu.be/AxmPS9xAgos}
}

@InProceedings{aljundi2022, 
	title = {Continual Novelty Detection}, 
	author = {Aljundi, Rahaf and Reino, Daniel Olmeda and Chumerin, Nikolay and Turner, Richard E.}, 
	abstract = {Novelty Detection methods identify samples that are not representative of a model's training set thereby flagging misleading predictions and bringing a greater flexibility and transparency at deployment time. However, research in this area has only considered Novelty Detection in the offline setting.  Recently, there has been a growing realization in the computer vision community that applications demand a more flexible framework - Continual Learning - where new batches of data representing new domains, new classes or new tasks become available at different points in time. In this setting, Novelty Detection becomes more important, interesting and challenging. This work identifies the crucial link between the two problems and investigates the Novelty Detection problem under the Continual Learning setting. We formulate the Continual Novelty Detection problem and present a benchmark, where we compare several Novelty Detection methods under different Continual Learning settings.  We show that Continual Learning affects the behaviour of novelty detection algorithms, while novelty detection can pinpoint insights in the behaviour of a continual learner. We further propose baselines and discuss possible research directions. We believe that the coupling of the two problems is a promising direction to bring vision models into practice.}, 
	pages = {1004-1025}, 
	video = {https://youtu.be/XAu4I81UiX4}
}

@InProceedings{gowda2022, 
	title = {InBiaseD: Inductive Bias Distillation to Improve Generalization and Robustness through Shape-awareness}, 
	author = {Gowda, Shruthi and Zonooz, Bahram and Arani, Elahe}, 
	abstract = {Humans rely less on spurious correlations and trivial cues, such as texture, compared to deep neural networks which lead to better generalization and robustness. It can be attributed to the prior knowledge or the high-level cognitive inductive bias present in the brain. Therefore, introducing meaningful inductive bias to neural networks can help learn more generic and high-level representations and alleviate some of the shortcomings. We propose InBiaseD to distill inductive bias and bring shape-awareness to the neural networks. Our method includes a bias alignment objective that enforces the networks to learn more generic representations that are less vulnerable to unintended cues in the data which results in improved generalization performance. InBiaseD is less susceptible to shortcut learning and also exhibits lower texture bias. The better representations also aid in improving robustness to adversarial attacks and we hence plugin InBiaseD seamlessly into the existing adversarial training schemes to show a better trade-off between generalization and robustness.}, 
	pages = {1026-1042}, 
	video = {https://youtu.be/YFj6Vmc8FGE}
}

@InProceedings{gauch2022, 
	title = {Few-Shot Learning by Dimensionality Reduction in Gradient Space}, 
	author = {Gauch, Martin and Beck, Maximilian and Adler, Thomas and Kotsur, Dmytro and Fiel, Stefan and Eghbal-zadeh, Hamid and Brandstetter, Johannes and Kofler, Johannes and Holzleitner, Markus and Zellinger, Werner and Klotz, Daniel and Hochreiter, Sepp and Lehner, Sebastian}, 
	abstract = {We introduce SubGD, a novel few-shot learning method which is based on the recent finding that stochastic gradient descent updates tend to live in a low-dimensional parameter subspace. In experimental and theoretical analyses, we show that models confined to a suitable predefined subspace generalize well for few-shot learning. A suitable subspace fulfills three criteria across the given tasks: it (a) allows to reduce the training error by gradient flow, (b) leads to models that generalize well, and (c) can be identified by stochastic gradient descent. SubGD identifies these subspaces from an eigendecomposition of the auto-correlation matrix of update directions across different tasks. Demonstrably, we can identify low-dimensional suitable subspaces for few-shot learning of dynamical systems, which have varying properties described by one or few parameters of the analytical system description. Such systems are ubiquitous among real-world applications in science and engineering. We experimentally corroborate the advantages of SubGD on three distinct dynamical systems problem settings, significantly outperforming popular few-shot learning methods both in terms of sample efficiency and performance.}, 
	pages = {1043-1064}, 
	video = {https://youtu.be/WCPIqdC7Nts}
}

@InProceedings{gummadi2022, 
	title = {SHELS: Exclusive Feature Sets for Novelty Detection and Continual Learning Without Class Boundaries}, 
	author = {Gummadi, Meghna and Kent, David and Mendez, Jorge A. and Eaton, Eric}, 
	abstract = {While deep neural networks (DNNs) have achieved impressive classification performance in closed-world learning scenarios, they typically fail to generalize to unseen categories in dynamic open-world environments, in which the number of concepts is unbounded. In contrast, human and animal learners have the ability to incrementally update their knowledge by recognizing and adapting to novel observations. In particular, humans characterize concepts via exclusive (unique) sets of essential features, which are used for both recognizing known classes and identifying novelty. Inspired by natural learners, we introduce a Sparse High-level-Exclusive, Low-level-Shared feature representation (SHELS) that simultaneously encourages learning exclusive sets of high-level features and essential, shared low-level features. The exclusivity of the high-level features enables the DNN to automatically detect out-of-distribution (OOD) data, while the efficient use of capacity via sparse low-level features permits accommodating new knowledge. The resulting approach uses OOD detection to perform class-incremental continual learning without known class boundaries. We show that using SHELS for novelty detection results in statistically significant improvements over state-of-the-art OOD detection approaches over a variety of benchmark datasets. Further, we demonstrate that the SHELS model mitigates catastrophic forgetting in a class-incremental learning setting, enabling a combined novelty detection and accommodation framework that supports learning in open-world settings.}, 
	pages = {1065-1085}, 
	video = {https://youtu.be/SrvXmGzl2qc}
}

@InProceedings{cheng2022, 
	title = {Trustworthiness Evaluation and Trust-Aware Design of CNN Architectures}, 
	author = {Cheng, Mingxi and Sun, Tingyang and Nazarian, Shahin and Bogdan, Paul}, 
	abstract = {Convolutional neural networks (CNNs) are known to be effective tools in many deep learning application areas. Despite CNN's good performance in terms of classical evaluation metrics such as accuracy and loss, quantifying and ensuring a high degree of trustworthiness of such models remains an unsolved problem raising questions in applications where trust is an important factor. In this work, we propose a framework to evaluate the trustworthiness of CNNs. Towards this end, we develop a trust-based pooling layer for CNNs to achieve higher accuracy and trustworthiness in applications with noise in input features. We further propose TrustCNets consisting of trustworthiness-aware CNN building blocks, i.e., one or more conv layers followed by a trust-based pooling layer. TrustCNets can stack together as a trust-aware CNN architecture or be plugged into deep learning architectures to improve performance. In our experiments, we evaluate the trustworthiness of popular CNN building blocks and demonstrate the performance of our TrustCNet empirically with multiple datasets.}, 
	pages = {1086-1102}, 
	video = {https://youtu.be/wnG46KDvsPg}
}

@InProceedings{garbacea2022, 
	title = {Adapting Pre-trained Language Models to Low-Resource Text Simplification: The Path Matters}, 
	author = {Garbacea, Cristina and Mei, Qiaozhu}, 
	abstract = {We frame the problem of text simplification from a task and domain adaptation perspective, where neural language models are pre-trained on large-scale corpora and then adapted to new tasks in different domains through limited training examples. We investigate the performance of two popular vehicles of task and domain adaptation: meta-learning and transfer learning (in particular fine-tuning), in the context of low-resource text simplification that involves a diversity of tasks and domains. We find that when directly adapting a Web-scale pre-trained language model to low-resource text simplification tasks, fine-tuning based methods present a competitive advantage over meta-learning approaches. Surprisingly, adding an intermediate stop in the adaptation path between the source and target, an auxiliary dataset and task that allow for the decomposition of the adaptation process into multiple steps, significantly increases the performance of the target task. The performance is however sensitive to the selection and ordering of the adaptation strategy (task adaptation vs. domain adaptation) in the two steps. When such an intermediate dataset is not available, one can build a pseudostop using the target domain/task itself. Our extensive analysis serves as a preliminary step towards bridging these two popular paradigms of few-shot adaptive learning and towards developing more structured solutions to task/domain adaptation in a novel setting.}, 
	pages = {1103-1119}, 
	video = {https://youtu.be/8uKBL7Om1JY}
}

@InProceedings{daniels2022, 
	title = {Model-Free Generative Replay for Lifelong Reinforcement Learning: Application to Starcraft-2}, 
	author = {Daniels, Zachary Alan and Raghavan, Aswin and Hostetler, Jesse and Rahman, Abrar and Sur, Indranil and Piacentino, Michael and Divakaran, Ajay and Corizzo, Roberto and Faber, Kamil and Japkowicz, Nathalie and Baron, Michael and Smith, James and Joshi, Sahana Pramod and Kira, Zsolt and Taylor, Cameron Ethan and Gurbuz, Mustafa Burak and Dovrolis, Constantine and Hayes, Tyler L. and Kanan, Christopher and Gallardo, Jhair}, 
	abstract = {One approach to meet the challenges of deep lifelong reinforcement learning (LRL) is careful management of the agent's learning experiences, in order to learn (without forgetting) and build internal meta-models (of the tasks, environments, agents, and world).  Generative replay (GR) is a biologically-inspired replay mechanism that augments learning experiences with self-labelled examples drawn from an internal generative model that is updated over time. In this paper, we present a version of GR for LRL that satisfies two desiderata: (a) Introspective density modelling of the latent representations of policies learned using deep RL, and (b) Model-free end-to-end learning. The first property avoids the challenges of density modelling of complex high-dimensional perceptual inputs, whereas policy learning using deep RL works well with such perceptual inputs. The second property avoids the challenges of learning temporal dynamics and reward functions from few learning experiences with sparse rewards. In this work, we study three deep learning architectures for model-free GR, starting from a naive GR and adding ingredients to achieve (a) and (b). We evaluate our proposed algorithms on three different scenarios comprising tasks from the StarCraft2 and Minigrid domains. We report several key findings showing the impact of the design choices on quantitative metrics that include transfer learning, generalization to unseen tasks, fast adaptation after task change, performance comparable to a task expert, and minimizing catastrophic forgetting.  We observe that our GR prevents drift in the features-to-action mapping from the latent vector space of a deep actor-critic agent. We also show improvements in established lifelong learning metrics. We find that the introduction of a small random replay buffer is needed to significantly increase the stability of training, when used in conjunction with the replay buffer and the generated replay buffer. Overall, we find that hidden replay (a well-known architecture for class-incremental classification) is the most promising approach that pushes the state-of-the-art in GR for LRL.}, 
	pages = {1120-1145}, 
	video = {https://youtu.be/o5h5vRn2_3s}
}

@InProceedings{lavington2022, 
	title = {Improved Policy Optimization for Online Imitation Learning}, 
	author = {Lavington, Jonathan Wilder and Vaswani, Sharan and Schmidt, Mark}, 
	abstract = {We consider online imitation learning (OIL), where the task is to find a policy that imitates the behavior of an expert via active interaction with the environment. We aim to bridge the gap between the theory and practice of policy optimization algorithms for OIL by analyzing one of the most popular OIL algorithms, DAGGER. Specifically, if the class of policies is sufficiently expressive to contain the expert policy, we prove that DAGGER achieves constant regret. Unlike previous bounds that require the losses to be strongly-convex, our result only requires the weaker assumption that the losses be strongly-convex with respect to the policy's sufficient statistics (not its parameterization). In order to ensure convergence for a wider class of policies and losses, we augment DAGGER with an additional regularization term. In particular, we propose a variant of Follow-the-Regularized-Leader (FTRL) and its adaptive variant for OIL and develop a memory-efficient implementation, which matches the memory requirements of FTL. Assuming that the loss functions are smooth and convex with respect to the parameters of the policy, we also prove that FTRL achieves constant regret for any sufficiently expressive policy class, while retaining $O(\sqrt{T})$ regret in the worst-case. We demonstrate the effectiveness of these algorithms with experiments on synthetic and high-dimensional control tasks.}, 
	pages = {1146-1173}, 
	video = {https://youtu.be/4RqMkFxEL7Q}
}

@InProceedings{smith2022, 
	title = {Learning Skills Diverse in Value-Relevant Features}, 
	author = {Smith, Matthew J. A. and Luketina, Jelena and Hartikainen, Kristian and Igl, Maximilian and Whiteson, Shimon}, 
	abstract = {Behavioural abstraction via temporally extended actions is vital to solving large-scale reinforcement learning problems. Skills structure exploration, speed up credit assignment, and can be used in transfer learning. However, such abstraction is often difficult or expensive for experts to craft by hand. Unsupervised information-theoretic methods (Gregor et al., 2016; Eysenbach et al., 2019; Sharma et al., 2020) address this problem by learning a set of skills without using environment rewards, typically by maximizing discriminability of the states visited by individual skills.  However, since only some features of the state matter in complex environments, these methods often discover behaviours that are trivially diverse, learning skills that are not helpful for downstream tasks. To overcome this limitation, we propose a method for learning skills that only control features important to the tasks of interest. First, by training on a small set of source tasks, the agent learns which features are most relevant.  Then, the discriminability objective for an unsupervised information-theoretic method is defined for this learned feature space. This allows the construction of sets of diverse and useful skills that can control the most important features. Experimental results in continuous control domains validate our method, demonstrating that it yields skills that substantially improve learning on downstream locomotion tasks with sparse rewards.}, 
	pages = {1174-1194}, 
	video = {https://youtu.be/FAF2RJnnMjY}
}

@InProceedings{bhat2022b, 
	title = {Consistency is the Key to Further Mitigating Catastrophic Forgetting in Continual Learning}, 
	author = {Bhat, Prashant Shivaram and Zonooz, Bahram and Arani, Elahe}, 
	abstract = {Deep neural networks struggle to continually learn multiple sequential tasks due to catastrophic forgetting of previously learned tasks. Rehearsal-based methods which explicitly store previous task samples in the buffer and interleave them with the current task samples have proven to be the most effective in mitigating forgetting. However, Experience Replay (ER) does not perform well under low-buffer regimes and longer task sequences as its performance is commensurate with the buffer size. Consistency in predictions of soft-targets can assist ER in preserving information pertaining to previous tasks better as soft-targets capture the rich similarity structure of the data. Therefore, we examine the role of consistency regularization in ER framework under various continual learning scenarios. We also propose to cast consistency regularization as a self-supervised pretext task thereby enabling the use of a wide variety of self-supervised learning methods as regularizers. While simultaneously enhancing model calibration and robustness to natural corruptions, regularizing consistency in predictions results in lesser forgetting across all continual learning scenarios. Among the different families of regularizers, we find that stricter consistency constraints preserve previous task information in ER better.}, 
	pages = {1195-1212}, 
	video = {https://youtu.be/Hqag-gq8JU4}
}

@InProceedings{tziafas2022, 
	title = {Sim-To-Real Transfer of Visual Grounding for Human-Aided Ambiguity Resolution}, 
	author = {Tziafas, Georgios and Schomaker, Lambert and Kasaei, Hamidreza}, 
	abstract = {Service robots should be able to interact naturally with non-expert human users, not only to help them in various tasks but also to receive guidance in order to resolve ambiguities that might be present in the instruction. We consider the task of visual grounding, where the agent segments an object from a crowded scene given a natural language description. Modern holistic approaches to visual grounding usually ignore language structure and struggle to cover generic domains, therefore relying heavily on large datasets. Additionally, their transfer performance in RGB-D datasets suffers due to high visual discrepancy between the benchmark and the target domains. Modular approaches marry learning with domain modeling and exploit the compositional nature of language to decouple visual representation from language parsing, but either rely on external parsers or are trained in an end-to-end fashion due to the lack of strong supervision. In this work, we seek to tackle these limitations by introducing a fully decoupled modular framework for compositional visual grounding of entities, attributes, and spatial relations. We exploit rich scene graph annotations generated in a synthetic domain and train each module independently.  Our approach is evaluated both in simulation and in two real RGB-D scene datasets. Experimental results show that the decoupled nature of our framework allows for easy integration with domain adaptation approaches for Sim-To-Real visual recognition, offering a data-efficient, robust, and interpretable solution to visual grounding in robotic applications.}, 
	pages = {1213-1230}, 
	video = {https://youtu.be/QbMkvwuTaqE}
}

@InProceedings{rusu2022, 
	title = {Probing Transfer in Deep Reinforcement Learning without Task Engineering}, 
	author = {Rusu, Andrei Alex and Flennerhag, Sebastian and Rao, Dushyant and Pascanu, Razvan and Hadsell, Raia}, 
	abstract = {We evaluate the use of original game curricula supported by the Atari 2600 console as a heterogeneous transfer benchmark for deep reinforcement learning agents. Game designers created curricula using combinations of several discrete modifications to the basic versions of games such as Space Invaders, Breakout and Freeway, making them progressively more challenging for human players. By formally organising these modifications into several factors of variation, we are able to show that Analyses of Variance (ANOVA) are a potent tool for studying the effects of human-relevant domain changes on the learning and transfer performance of a deep reinforcement learning agent. Since no manual task engineering is needed on our part, leveraging the original multi-factorial design avoids the pitfalls of unintentionally biasing the experimental setup. We find that game design factors have a large and statistically significant impact on an agent's ability to learn, and so do their combinatorial interactions. Furthermore, we show that zero-shot transfer from the basic games to their respective variations is possible, but the variance in performance is also largely explained by interactions between factors. As such, we argue that Atari game curricula offer a challenging benchmark for transfer learning in RL, that can help the community better understand the generalisation capabilities of RL agents along dimensions which meaningfully impact human generalisation performance. As a start, we report that value-function finetuning of regularly trained agents achieves positive transfer in a majority of cases, but significant headroom for algorithmic innovation remains. We conclude with the observation that selective transfer from multiple variants could further improve performance.}, 
	pages = {1231-1254}, 
	video = {https://youtu.be/ImvU4Yh6j20}
}