diff --git a/image-inpainting/results/testset/tikaiz-21.9691.npz b/image-inpainting/results/testset/tikaiz-21.9691.npz new file mode 100644 index 0000000..4654541 Binary files /dev/null and b/image-inpainting/results/testset/tikaiz-21.9691.npz differ diff --git a/image-inpainting/src/__pycache__/architecture.cpython-313.pyc b/image-inpainting/src/__pycache__/architecture.cpython-313.pyc index 5295f71..0f66f1e 100644 Binary files a/image-inpainting/src/__pycache__/architecture.cpython-313.pyc and b/image-inpainting/src/__pycache__/architecture.cpython-313.pyc differ diff --git a/image-inpainting/src/__pycache__/architecture.cpython-314.pyc b/image-inpainting/src/__pycache__/architecture.cpython-314.pyc index 5abafe7..a9bef6f 100644 Binary files a/image-inpainting/src/__pycache__/architecture.cpython-314.pyc and b/image-inpainting/src/__pycache__/architecture.cpython-314.pyc differ diff --git a/image-inpainting/src/__pycache__/datasets.cpython-313.pyc b/image-inpainting/src/__pycache__/datasets.cpython-313.pyc index c101f32..abbc066 100644 Binary files a/image-inpainting/src/__pycache__/datasets.cpython-313.pyc and b/image-inpainting/src/__pycache__/datasets.cpython-313.pyc differ diff --git a/image-inpainting/src/__pycache__/datasets.cpython-314.pyc b/image-inpainting/src/__pycache__/datasets.cpython-314.pyc index b6f25ff..0ec1a79 100644 Binary files a/image-inpainting/src/__pycache__/datasets.cpython-314.pyc and b/image-inpainting/src/__pycache__/datasets.cpython-314.pyc differ diff --git a/image-inpainting/src/__pycache__/train.cpython-313.pyc b/image-inpainting/src/__pycache__/train.cpython-313.pyc index 3b0020b..9e1c099 100644 Binary files a/image-inpainting/src/__pycache__/train.cpython-313.pyc and b/image-inpainting/src/__pycache__/train.cpython-313.pyc differ diff --git a/image-inpainting/src/__pycache__/train.cpython-314.pyc b/image-inpainting/src/__pycache__/train.cpython-314.pyc index 4d47be3..09c3a83 100644 Binary files a/image-inpainting/src/__pycache__/train.cpython-314.pyc and b/image-inpainting/src/__pycache__/train.cpython-314.pyc differ diff --git a/image-inpainting/src/__pycache__/utils.cpython-313.pyc b/image-inpainting/src/__pycache__/utils.cpython-313.pyc index 251d5ec..dc2f6aa 100644 Binary files a/image-inpainting/src/__pycache__/utils.cpython-313.pyc and b/image-inpainting/src/__pycache__/utils.cpython-313.pyc differ diff --git a/image-inpainting/src/architecture.py b/image-inpainting/src/architecture.py index 76a6e1e..8234087 100644 --- a/image-inpainting/src/architecture.py +++ b/image-inpainting/src/architecture.py @@ -15,9 +15,11 @@ def init_weights(m): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) + elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d)): + if m.weight is not None: + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0) class ChannelAttention(nn.Module): @@ -69,35 +71,36 @@ class CBAM(nn.Module): class ConvBlock(nn.Module): - """Convolutional block with Conv2d -> BatchNorm -> LeakyReLU""" - def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, dropout=0.0): + """Convolutional block with Conv2d -> InstanceNorm2d -> GELU""" + def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, dropout=0.0, dilation=1): super().__init__() - self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding) - self.bn = nn.BatchNorm2d(out_channels) - self.relu = nn.LeakyReLU(0.1, inplace=True) + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation) + # InstanceNorm is preferred for style/inpainting tasks + self.bn = nn.InstanceNorm2d(out_channels, affine=True) + self.act = nn.GELU() self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity() def forward(self, x): - return self.dropout(self.relu(self.bn(self.conv(x)))) + return self.dropout(self.act(self.bn(self.conv(x)))) class ResidualConvBlock(nn.Module): """Residual convolutional block for better gradient flow""" - def __init__(self, channels, dropout=0.0): + def __init__(self, channels, dropout=0.0, dilation=1): super().__init__() - self.conv1 = nn.Conv2d(channels, channels, 3, padding=1) - self.bn1 = nn.BatchNorm2d(channels) - self.conv2 = nn.Conv2d(channels, channels, 3, padding=1) - self.bn2 = nn.BatchNorm2d(channels) - self.relu = nn.LeakyReLU(0.1, inplace=True) + self.conv1 = nn.Conv2d(channels, channels, 3, padding=dilation, dilation=dilation) + self.bn1 = nn.InstanceNorm2d(channels, affine=True) + self.conv2 = nn.Conv2d(channels, channels, 3, padding=dilation, dilation=dilation) + self.bn2 = nn.InstanceNorm2d(channels, affine=True) + self.act = nn.GELU() self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity() def forward(self, x): residual = x - out = self.relu(self.bn1(self.conv1(x))) + out = self.act(self.bn1(self.conv1(x))) out = self.dropout(out) out = self.bn2(self.conv2(out)) out = out + residual - return self.relu(out) + return self.act(out) class DownBlock(nn.Module): @@ -161,9 +164,9 @@ class MyModel(nn.Module): # Bottleneck with multiple residual blocks self.bottleneck = nn.Sequential( ConvBlock(base_channels * 16, base_channels * 16, dropout=dropout), - ResidualConvBlock(base_channels * 16, dropout=dropout), - ResidualConvBlock(base_channels * 16, dropout=dropout), - ResidualConvBlock(base_channels * 16, dropout=dropout), + ResidualConvBlock(base_channels * 16, dropout=dropout, dilation=2), + ResidualConvBlock(base_channels * 16, dropout=dropout, dilation=4), + ResidualConvBlock(base_channels * 16, dropout=dropout, dilation=8), CBAM(base_channels * 16) ) @@ -183,7 +186,7 @@ class MyModel(nn.Module): # Output layer with smooth transition self.output = nn.Sequential( nn.Conv2d(base_channels, base_channels // 2, kernel_size=3, padding=1), - nn.LeakyReLU(0.1, inplace=True), + nn.GELU(), nn.Conv2d(base_channels // 2, 3, kernel_size=1), nn.Sigmoid() # Ensure output is in [0, 1] range ) diff --git a/image-inpainting/src/datasets.py b/image-inpainting/src/datasets.py index d5e74eb..45a7f6e 100644 --- a/image-inpainting/src/datasets.py +++ b/image-inpainting/src/datasets.py @@ -26,11 +26,21 @@ def create_arrays_from_image(image_array: np.ndarray, offset: tuple, spacing: tu return image_array, known_array -def resize(img: Image): - resize_transforms = transforms.Compose([ +def resize(img: Image, augment: bool = False): + transforms_list = [ transforms.Resize((IMAGE_DIMENSION, IMAGE_DIMENSION)), transforms.CenterCrop((IMAGE_DIMENSION, IMAGE_DIMENSION)) - ]) + ] + + if augment: + transforms_list = [ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05), + transforms.RandomRotation(10), + ] + transforms_list + + resize_transforms = transforms.Compose(transforms_list) return resize_transforms(img) def preprocess(input_array: np.ndarray): input_array = np.asarray(input_array, dtype=np.float32) / 255.0 @@ -41,8 +51,9 @@ class ImageDataset(torch.utils.data.Dataset): Dataset class for loading images from a folder """ - def __init__(self, datafolder: str): + def __init__(self, datafolder: str, augment: bool = False): self.imagefiles = sorted(glob.glob(os.path.join(datafolder,"**","*.jpg"),recursive=True)) + self.augment = augment def __len__(self): return len(self.imagefiles) @@ -51,7 +62,7 @@ class ImageDataset(torch.utils.data.Dataset): index = int(idx) image = Image.open(self.imagefiles[index]) - image = np.asarray(resize(image)) + image = np.asarray(resize(image, self.augment)) image = preprocess(image) spacing_x = random.randint(2,6) spacing_y = random.randint(2,6) diff --git a/image-inpainting/src/train.py b/image-inpainting/src/train.py index 10bf917..8a937da 100644 --- a/image-inpainting/src/train.py +++ b/image-inpainting/src/train.py @@ -84,16 +84,21 @@ def train(seed, testset_ratio, validset_ratio, data_path, results_path, early_st plotpath = os.path.join(results_path, "plots") os.makedirs(plotpath, exist_ok=True) - image_dataset = datasets.ImageDataset(datafolder=data_path) + image_dataset = datasets.ImageDataset(datafolder=data_path, augment=False) n_total = len(image_dataset) n_test = int(n_total * testset_ratio) n_valid = int(n_total * validset_ratio) n_train = n_total - n_test - n_valid indices = np.random.permutation(n_total) - dataset_train = Subset(image_dataset, indices=indices[0:n_train]) - dataset_valid = Subset(image_dataset, indices=indices[n_train:n_train + n_valid]) - dataset_test = Subset(image_dataset, indices=indices[n_train + n_valid:n_total]) + + # Create datasets with and without augmentation + train_dataset_source = datasets.ImageDataset(datafolder=data_path, augment=True) + val_test_dataset_source = datasets.ImageDataset(datafolder=data_path, augment=False) + + dataset_train = Subset(train_dataset_source, indices=indices[0:n_train]) + dataset_valid = Subset(val_test_dataset_source, indices=indices[n_train:n_train + n_valid]) + dataset_test = Subset(val_test_dataset_source, indices=indices[n_train + n_valid:n_total]) assert len(image_dataset) == len(dataset_train) + len(dataset_test) + len(dataset_valid)