diff --git a/ignite/engine/engine.py b/ignite/engine/engine.py index a013fda29b1..f4333eee324 100644 --- a/ignite/engine/engine.py +++ b/ignite/engine/engine.py @@ -125,6 +125,23 @@ def compute_mean_std(engine, batch): _state_dict_all_req_keys = ("epoch_length", "max_epochs") _state_dict_one_of_opt_keys = ("iteration", "epoch") + class debug_mode(EventEnum): + DEBUG_NONE = 0 + DEBUG_EVENTS = 1 + DEBUG_OUTPUT = 2 + DEBUG_GRADS = 4 + + def __iter__(self) -> Iterator: + return iter(self.name) + + def __int__(self) -> str: + return self.value + + DEBUG_NONE = debug_mode.DEBUG_NONE + DEBUG_EVENTS = debug_mode.DEBUG_EVENTS + DEBUG_OUTPUT = debug_mode.DEBUG_OUTPUT + DEBUG_GRADS = debug_mode.DEBUG_GRADS + # Flag to disable engine._internal_run as generator feature for BC interrupt_resume_enabled = True @@ -143,6 +160,8 @@ def __init__(self, process_function: Callable[["Engine", Any], Any]): self._dataloader_iter: Optional[Iterator[Any]] = None self._init_iter: Optional[int] = None + self.debug_level = 0 + self.register_events(*Events) if self._process_function is None: @@ -425,6 +444,28 @@ def _fire_event(self, event_name: Any, *event_args: Any, **event_kwargs: Any) -> first, others = ((args[0],), args[1:]) if (args and args[0] == self) else ((), args) func(*first, *(event_args + others), **kwargs) + def debug(self, level: debug_mode = DEBUG_NONE, config: Union[Dict, Any] = None) -> None: + if isinstance(level, int): + raise ValueError( + f"Unknown event name '{level}'. Level should be combinations of Engine.DEBUG_NONE, " + f"Engine.DEBUG_EVENTS, Engine.DEBUG_OUTPUT, Engine.DEBUG_GRADS" + ) + self.lr = config["optimizer"].param_groups[0]["lr"] + self.layer = config["layer"] + + log = "" + for item in level: + if item == Engine.DEBUG_NONE: + log += "" + elif item == Engine.DEBUG_EVENTS: + log += f"{self.state.epoch} | {self.state.iteration}, Firing handlers for event {self.last_event_name} " + elif item == Engine.DEBUG_OUTPUT: + log += f"Loss : {self.state.output}, LR : {self.lr} " + elif item == Engine.DEBUG_GRADS: + log += f"Gradients : {self.layer.weight.grad} " + + self.logger.debug(log) + def fire_event(self, event_name: Any) -> None: """Execute all the handlers associated with given event. diff --git a/ignite/engine/events.py b/ignite/engine/events.py index ef76babdc5b..4bf83ddbb69 100644 --- a/ignite/engine/events.py +++ b/ignite/engine/events.py @@ -445,6 +445,7 @@ def __init__(self, **kwargs: Any) -> None: self.batch: Optional[int] = None self.metrics: Dict[str, Any] = {} self.dataloader: Optional[Union[DataLoader, Iterable[Any]]] = None + self.debug_config: Dict[str, Any] = {} self.seed: Optional[int] = None self.times: Dict[str, Optional[float]] = { Events.EPOCH_COMPLETED.name: None, diff --git a/tests/ignite/engine/test_engine.py b/tests/ignite/engine/test_engine.py index 994eb49f72b..161a4ee5ab5 100644 --- a/tests/ignite/engine/test_engine.py +++ b/tests/ignite/engine/test_engine.py @@ -1388,3 +1388,76 @@ def check_iter_epoch(): state = engine.run(data, max_epochs=max_epochs) assert state.iteration == max_epochs * len(data) and state.epoch == max_epochs assert num_calls_check_iter_epoch == 1 + + +def test_engine_debug(): + import torch.nn.functional as F + from torch import nn + from torch.optim import SGD + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + from torchvision.transforms import Compose, ToTensor + + from ignite.engine import create_supervised_trainer + + class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x, dim=-1) + + def _test(): + train_loader = DataLoader( + MNIST(download=True, root=".", transform=Compose([ToTensor()]), train=True), + batch_size=64, + shuffle=True, + ) + + model = Net() + device = "cpu" + log_interval = 10 + epochs = 10 + + if torch.cuda.is_available(): + device = "cuda" + + model.to(device) # Move model before creating optimizer + optimizer = SGD(model.parameters(), lr=0.01, momentum=0.5) + criterion = nn.NLLLoss() + trainer = create_supervised_trainer(model, optimizer, criterion, device=device) + debug_config = {} + debug_config["optimizer"] = optimizer + debug_config["layer"] = model.fc2 + + def log_training_debug_events(engine): + trainer.debug(level=Engine.DEBUG_EVENTS, config=debug_config) + + def log_training_debug_outputs(engine): + trainer.debug(level=Engine.DEBUG_OUTPUT, config=debug_config) + + def log_training_debug_grads(engine): + trainer.debug(level=Engine.DEBUG_GRADS, config=debug_config) + + def log_training_debug_int(engine): + with pytest.raises( + ValueError, + match=r"Unknown event name '2'. Level should be combinations of Engine.DEBUG_NONE, " + r"Engine.DEBUG_EVENTS, Engine.DEBUG_OUTPUT, Engine.DEBUG_GRADS", + ): + trainer.debug(level=2, config=debug_config) + + trainer.run(train_loader, max_epochs=epochs) + + _test()