#!/usr/bin/env python3 # scripts/train.py """ Script di training per il classificatore basilico vs pomodoro. Struttura: - carica dataset da data/basil_tomato/train e /val - transfer learning con EfficientNet-B0 - salva il miglior modello in models/basil_tomato_classifier.pth """ import os import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms, models from torch.utils.data import DataLoader # 1) Percorsi dataset train_dir = "data/basil_tomato/train" val_dir = "data/basil_tomato/val" # 2) Trasformazioni dati train_transforms = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_transforms = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # 3) Crea dataset e DataLoader train_ds = datasets.ImageFolder(train_dir, transform=train_transforms) val_ds = datasets.ImageFolder(val_dir, transform=val_transforms) train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=4) val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=4) print(f"Classi trovate: {train_ds.classes}") print(f"Numero immagini train: {len(train_ds)}, validation: {len(val_ds)}") # 4) Configura device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 5) Costruisci il modello model = models.efficientnet_b0(pretrained=True) num_classes = len(train_ds.classes) model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes) model = model.to(device) # 6) Definisci criterio e ottimizzatore criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5) # 7) Funzioni di training e validation def train_epoch(): model.train() running_loss, running_corrects = 0.0, 0 for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += (outputs.argmax(1) == labels).sum().item() epoch_loss = running_loss / len(train_ds) epoch_acc = running_corrects / len(train_ds) return epoch_loss, epoch_acc def validate_epoch(): model.eval() val_loss, val_corrects = 0.0, 0 with torch.no_grad(): for inputs, labels in val_loader: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) loss = criterion(outputs, labels) val_loss += loss.item() * inputs.size(0) val_corrects += (outputs.argmax(1) == labels).sum().item() loss = val_loss / len(val_ds) acc = val_corrects / len(val_ds) return loss, acc # 8) Training loop principale best_val_acc = 0.0 os.makedirs("models", exist_ok=True) for epoch in range(1, 11): # 10 epoche train_loss, train_acc = train_epoch() val_loss, val_acc = validate_epoch() print(f"Epoca {epoch}: train_loss={train_loss:.4f}, train_acc={train_acc:.4f} | " f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}") # Salva il modello migliore if val_acc > best_val_acc: best_val_acc = val_acc save_path = os.path.join("models", "basil_tomato_classifier.pth") torch.save(model.state_dict(), save_path) print(f"--> Nuovo best model salvato con val_acc={val_acc:.4f}") print("Training completato. Best val_acc: {:.4f}".format(best_val_acc))