@phdthesis{oai:soka.repo.nii.ac.jp:00040967, author = {BARBOSA, MORMILLE LUIZ HENRIQUE}, month = {2023-03-23, 2023-03-23, 2023-03-23}, note = {In recent years, the Transformer achieved remarkable results in computer vision related tasks, matching, or even surpassing those of convolutional neural networks (CNN). However, unlike CNNs, those vision transformers lack strong inductive biases and, to achieve state-of-the-art results, rely on large architectures and extensive pre-training on tens of millions of images. Introducing the appropriate inductive biases to vision transformers can lead to better convergence and generalization on settings with fewer training data. This work presents a novel way to introduce inductive biases to vision transformers: self-attention regularization. Two different methods of self-attention regularization were devised. Furthermore, this work proposes ARViT, a novel vision transformer architecture, where both self-attention regularization methods are deployed. The experimental results demonstrated that self-attention regularization leads to better convergence and generalization, especially on models pre-trained on mid-size datasets.}, school = {創価大学}, title = {Vision transformers with Inductive Bias introduced through self-attention regularization}, year = {} }