added prediction, 16.6824

2026-01-26 14:01:16 +01:00
parent c00089a97d
commit 1f859a3d71
8 changed files with 107 additions and 58 deletions
--- a/image-inpainting/results/testset/tikaiz-16.6824.npz
+++ b/image-inpainting/results/testset/tikaiz-16.6824.npz
--- a/image-inpainting/src/pycache/architecture.cpython-313.pyc
+++ b/image-inpainting/src/pycache/architecture.cpython-313.pyc
--- a/image-inpainting/src/pycache/datasets.cpython-313.pyc
+++ b/image-inpainting/src/pycache/datasets.cpython-313.pyc
--- a/image-inpainting/src/pycache/train.cpython-313.pyc
+++ b/image-inpainting/src/pycache/train.cpython-313.pyc
--- a/image-inpainting/src/architecture.py
+++ b/image-inpainting/src/architecture.py
@@ -88,9 +88,16 @@ class EfficientAttention(nn.Module):

 class ConvBlock(nn.Module):
    """Convolutional block with Conv2d -> BatchNorm -> LeakyReLU"""
-    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, dilation=1, dropout=0.0):
+    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, dilation=1, dropout=0.0, separable=False):
        super().__init__()
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation)
+        if separable and in_channels > 1:
+            # Depthwise separable convolution for efficiency
+            self.conv = nn.Sequential(
+                nn.Conv2d(in_channels, in_channels, kernel_size, padding=padding, dilation=dilation, groups=in_channels),
+                nn.Conv2d(in_channels, out_channels, 1)
+            )
+        else:
+            self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.LeakyReLU(0.2, inplace=True)
        self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity()
@@ -142,30 +149,39 @@ class ResidualConvBlock(nn.Module):


 class DownBlock(nn.Module):
-    """Enhanced downsampling block with residual connections"""
-    def __init__(self, in_channels, out_channels, dropout=0.1, use_attention=True):
+    """Enhanced downsampling block with dense and residual connections"""
+    def __init__(self, in_channels, out_channels, dropout=0.1, use_attention=True, use_dense=False):
        super().__init__()
-        self.conv1 = ConvBlock(in_channels, out_channels, dropout=dropout)
-        self.residual = ResidualConvBlock(out_channels, dropout=dropout)
+        self.conv1 = ConvBlock(in_channels, out_channels, dropout=dropout, separable=True)
+        self.conv2 = ConvBlock(out_channels, out_channels, dropout=dropout)
+        if use_dense:
+            self.dense = DenseBlock(out_channels, growth_rate=8, num_layers=2, dropout=dropout)
+        else:
+            self.dense = ResidualConvBlock(out_channels, dropout=dropout)
        self.attention = EfficientAttention(out_channels) if use_attention else nn.Identity()
        self.pool = nn.MaxPool2d(2)
    
    def forward(self, x):
        x = self.conv1(x)
-        x = self.residual(x)
+        x = self.conv2(x)
+        x = self.dense(x)
        skip = self.attention(x)
        return self.pool(skip), skip

 class UpBlock(nn.Module):
    """Enhanced upsampling block with gated skip connections"""
-    def __init__(self, in_channels, out_channels, dropout=0.1, use_attention=True):
+    def __init__(self, in_channels, out_channels, dropout=0.1, use_attention=True, use_dense=False):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        # Skip connection has in_channels, upsampled has out_channels
        self.gated_skip = GatedSkipConnection(out_channels, in_channels)
        # After gated skip: out_channels
-        self.conv1 = ConvBlock(out_channels, out_channels, dropout=dropout)
-        self.residual = ResidualConvBlock(out_channels, dropout=dropout)
+        self.conv1 = ConvBlock(out_channels, out_channels, dropout=dropout, separable=True)
+        self.conv2 = ConvBlock(out_channels, out_channels, dropout=dropout)
+        if use_dense:
+            self.dense = DenseBlock(out_channels, growth_rate=8, num_layers=2, dropout=dropout)
+        else:
+            self.dense = ResidualConvBlock(out_channels, dropout=dropout)
        self.attention = EfficientAttention(out_channels) if use_attention else nn.Identity()
    
    def forward(self, x, skip):
@@ -175,7 +191,8 @@ class UpBlock(nn.Module):
            x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
        x = self.gated_skip(x, skip)
        x = self.conv1(x)
-        x = self.residual(x)
+        x = self.conv2(x)
+        x = self.dense(x)
        x = self.attention(x)
        return x

@@ -205,28 +222,30 @@ class MyModel(nn.Module):
            nn.LeakyReLU(0.2, inplace=True)
        )
        
-        # Encoder with attention on deeper layers only
-        self.down1 = DownBlock(base_channels, base_channels * 2, dropout=dropout, use_attention=False)
-        self.down2 = DownBlock(base_channels * 2, base_channels * 4, dropout=dropout, use_attention=True)
-        self.down3 = DownBlock(base_channels * 4, base_channels * 8, dropout=dropout, use_attention=True)
+        # Encoder with progressive feature extraction
+        self.down1 = DownBlock(base_channels, base_channels * 2, dropout=dropout, use_attention=False, use_dense=False)
+        self.down2 = DownBlock(base_channels * 2, base_channels * 4, dropout=dropout, use_attention=True, use_dense=True)
+        self.down3 = DownBlock(base_channels * 4, base_channels * 8, dropout=dropout, use_attention=True, use_dense=True)
        
-        # Enhanced bottleneck with multi-scale features
+        # Enhanced bottleneck with multi-scale features and dense connections
        self.bottleneck = nn.Sequential(
            ConvBlock(base_channels * 8, base_channels * 8, dropout=dropout),
+            DenseBlock(base_channels * 8, growth_rate=10, num_layers=3, dropout=dropout),
            ConvBlock(base_channels * 8, base_channels * 8, dilation=2, padding=2, dropout=dropout),
            ResidualConvBlock(base_channels * 8, dropout=dropout),
            EfficientAttention(base_channels * 8)
        )
        
-        # Decoder with attention on deeper layers
-        self.up1 = UpBlock(base_channels * 8, base_channels * 4, dropout=dropout, use_attention=True)
-        self.up2 = UpBlock(base_channels * 4, base_channels * 2, dropout=dropout, use_attention=True)
-        self.up3 = UpBlock(base_channels * 2, base_channels, dropout=dropout, use_attention=False)
+        # Decoder with progressive reconstruction
+        self.up1 = UpBlock(base_channels * 8, base_channels * 4, dropout=dropout, use_attention=True, use_dense=True)
+        self.up2 = UpBlock(base_channels * 4, base_channels * 2, dropout=dropout, use_attention=True, use_dense=True)
+        self.up3 = UpBlock(base_channels * 2, base_channels, dropout=dropout, use_attention=False, use_dense=False)
        
-        # Multi-scale feature fusion
+        # Multi-scale feature fusion with dense connections
        self.multiscale_fusion = nn.Sequential(
            ConvBlock(base_channels * 2, base_channels),
-            ResidualConvBlock(base_channels, dropout=dropout//2)
+            DenseBlock(base_channels, growth_rate=8, num_layers=2, dropout=dropout//2),
+            ConvBlock(base_channels, base_channels)
        )
        
        # Output with residual connection to input
--- a/image-inpainting/src/datasets.py
+++ b/image-inpainting/src/datasets.py
@@ -37,27 +37,35 @@ def preprocess(input_array: np.ndarray):
    input_array = np.asarray(input_array, dtype=np.float32) / 255.0
    return input_array

-def augment_image(img: Image, strength: float = 0.5) -> Image:
-    """Apply fast data augmentation with controlled strength"""
+def augment_image(img: Image, strength: float = 0.7) -> Image:
+    """Apply comprehensive data augmentation for better generalization"""
    # Random horizontal flip
    if random.random() > 0.5:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)
    
-    # Random rotation (90, 180, 270 degrees) - less frequent for speed
-    if random.random() > 0.6:
+    # Random vertical flip
+    if random.random() > 0.5:
+        img = img.transpose(Image.FLIP_TOP_BOTTOM)
+    
+    # Random rotation (90, 180, 270 degrees)
+    if random.random() > 0.5:
        angle = random.choice([90, 180, 270])
        img = img.rotate(angle)
    
-    # Simplified color jitter - only one transformation per image for speed
+    # Color augmentation - more aggressive for long training
    rand = random.random()
-    if rand > 0.66:
+    if rand > 0.75:
        # Brightness
-        factor = 1.0 + random.uniform(-0.15, 0.15) * strength
+        factor = 1.0 + random.uniform(-0.2, 0.2) * strength
        img = ImageEnhance.Brightness(img).enhance(factor)
-    elif rand > 0.33:
+    elif rand > 0.5:
        # Contrast
-        factor = 1.0 + random.uniform(-0.15, 0.15) * strength
+        factor = 1.0 + random.uniform(-0.2, 0.2) * strength
        img = ImageEnhance.Contrast(img).enhance(factor)
+    elif rand > 0.25:
+        # Saturation
+        factor = 1.0 + random.uniform(-0.15, 0.15) * strength
+        img = ImageEnhance.Color(img).enhance(factor)
    
    return img

@@ -66,7 +74,7 @@ class ImageDataset(torch.utils.data.Dataset):
    Dataset class for loading images from a folder with augmentation support
    """

-    def __init__(self, datafolder: str, augment: bool = True, augment_strength: float = 0.5):
+    def __init__(self, datafolder: str, augment: bool = True, augment_strength: float = 0.7):
        self.imagefiles = sorted(glob.glob(os.path.join(datafolder,"**","*.jpg"),recursive=True))
        self.augment = augment
        self.augment_strength = augment_strength
--- a/image-inpainting/src/main.py
+++ b/image-inpainting/src/main.py
@@ -24,22 +24,22 @@ if __name__ == '__main__':
    config_dict['results_path'] = os.path.join(project_root, "results")
    config_dict['data_path'] = os.path.join(project_root, "data", "dataset")
    config_dict['device'] = None
-    config_dict['learningrate'] = 1e-3  # Higher max LR for OneCycleLR
-    config_dict['weight_decay'] = 1e-5  # Lower for faster learning
-    config_dict['n_updates'] = 3500  # Reduced for fast training
-    config_dict['batchsize'] = 96  # Larger batch for speed
-    config_dict['early_stopping_patience'] = 3  # Adjusted patience
+    config_dict['learningrate'] = 8e-4  # Optimized for long training
+    config_dict['weight_decay'] = 1e-4  # Better regularization for long training
+    config_dict['n_updates'] = 30000  # Full day of training (~24 hours)
+    config_dict['batchsize'] = 64  # Balanced for memory and quality
+    config_dict['early_stopping_patience'] = 15  # More patience for better convergence
    config_dict['use_wandb'] = False

-    config_dict['print_train_stats_at'] = 10
-    config_dict['print_stats_at'] = 100
+    config_dict['print_train_stats_at'] = 50
+    config_dict['print_stats_at'] = 200
    config_dict['plot_at'] = 500
-    config_dict['validate_at'] = 500  # More frequent validation
+    config_dict['validate_at'] = 500  # Regular validation

    network_config = {
        'n_in_channels': 4,
-        'base_channels': 40,  # Optimized for memory efficiency
-        'dropout': 0.08  # Fine-tuned dropout
+        'base_channels': 44,  # Optimal capacity for 16GB VRAM
+        'dropout': 0.12  # Higher dropout for longer training
    }
    
    config_dict['network_config'] = network_config
--- a/image-inpainting/src/train.py
+++ b/image-inpainting/src/train.py
@@ -15,7 +15,6 @@ import os

 from torch.utils.data import DataLoader
 from torch.utils.data import Subset
-from torch.optim.lr_scheduler import OneCycleLR

 import wandb

@@ -44,6 +43,10 @@ def train(seed, testset_ratio, validset_ratio, data_path, results_path, early_st

    if isinstance(device, str):
        device = torch.device(device)
+    
+    # Enable mixed precision training for memory efficiency
+    use_amp = torch.cuda.is_available()
+    scaler = torch.amp.GradScaler('cuda') if use_amp else None

    if use_wandb:
        wandb.login()
@@ -93,11 +96,12 @@ def train(seed, testset_ratio, validset_ratio, data_path, results_path, early_st
    mse_loss = torch.nn.MSELoss()  # Keep for evaluation

    # defining the optimizer with AdamW for better weight decay handling
-    optimizer = torch.optim.AdamW(network.parameters(), lr=learningrate, weight_decay=weight_decay, betas=(0.9, 0.99))
+    optimizer = torch.optim.AdamW(network.parameters(), lr=learningrate, weight_decay=weight_decay, betas=(0.9, 0.999))
    
-    # OneCycleLR for fast convergence - ramps up then down over entire training
-    scheduler = OneCycleLR(optimizer, max_lr=learningrate, total_steps=n_updates, 
-                          pct_start=0.3, anneal_strategy='cos', div_factor=25.0, final_div_factor=1e4)
+    # Cosine annealing with warm restarts for long training
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
+        optimizer, T_0=n_updates//4, T_mult=1, eta_min=learningrate/100
+    )

    if use_wandb:
        wandb.watch(network, mse_loss, log="all", log_freq=10)
@@ -122,17 +126,31 @@ def train(seed, testset_ratio, validset_ratio, data_path, results_path, early_st

            optimizer.zero_grad()

-            output = network(input)
-
-            loss = rmse_loss(output, target)
-
-            loss.backward()
+            # Mixed precision training for memory efficiency
+            if use_amp:
+                with torch.amp.autocast('cuda'):
+                    output = network(input)
+                    loss = rmse_loss(output, target)
+                
+                scaler.scale(loss).backward()
+                
+                # Gradient clipping for training stability
+                scaler.unscale_(optimizer)
+                torch.nn.utils.clip_grad_norm_(network.parameters(), max_norm=1.0)
+                
+                scaler.step(optimizer)
+                scaler.update()
+            else:
+                output = network(input)
+                loss = rmse_loss(output, target)
+                loss.backward()
+                
+                # Gradient clipping for training stability
+                torch.nn.utils.clip_grad_norm_(network.parameters(), max_norm=1.0)
+                
+                optimizer.step()
            
-            # Gradient clipping for training stability
-            torch.nn.utils.clip_grad_norm_(network.parameters(), max_norm=1.0)
-
-            optimizer.step()
-            scheduler.step()  # OneCycleLR steps once per optimizer step
+            scheduler.step()

            loss_list.append(loss.item())

@@ -143,7 +161,11 @@ def train(seed, testset_ratio, validset_ratio, data_path, results_path, early_st
            # plotting
            if (i + 1) % plot_at == 0:
                print(f"Plotting images, current update {i + 1}")
-                plot(input.cpu().numpy(), target.detach().cpu().numpy(), output.detach().cpu().numpy(), plotpath, i)
+                # Convert to float32 for matplotlib compatibility
+                plot(input.float().cpu().numpy(), 
+                     target.detach().float().cpu().numpy(), 
+                     output.detach().float().cpu().numpy(), 
+                     plotpath, i)

            # evaluating model every validate_at sample
            if (i + 1) % validate_at == 0: