Skip to content

Commit

Permalink
Updated for logger.
Browse files Browse the repository at this point in the history
  • Loading branch information
sraashis committed Dec 5, 2023
1 parent d909836 commit 49b7865
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 26 deletions.
4 changes: 4 additions & 0 deletions easytorch/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ def get_data_split(self):
elif p.suffix == '.txt':
with open(str(p)) as fw:
files = fw.read().splitlines()

elif p.is_file():
files = [self.data_source]

else:
raise ValueError(f"Unknown data source: {self.data_source}")

Expand Down
31 changes: 14 additions & 17 deletions easytorch/easytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,6 @@ def __init__(self, config_source=_conf.args_parser(), dataloader_args=None, **kw
self._ddp_setup()
self._make_reproducible()
self.conf.update(is_master=self.conf.get('is_master', True))
self.conf['RUN-ID'] = _dtime.now().strftime("ET-%Y-%m-%d-%H%M%S-") + _uuid.uuid4().hex[:8].upper()

self.conf['save_dir'] = self.conf['output_base_dir'] + _sep + (
self.conf['phase'].upper() + _sep + self.conf["name"]
)
Expand Down Expand Up @@ -207,16 +205,14 @@ def _run_training_and_eval(self, data_split, engine, dataset_cls):

engine.save_checkpoint(engine.conf['save_dir'] + _sep + engine.cache['latest_checkpoint'])

train_log = engine.conf['save_dir'] + _sep + ".train_log.npy"
val_log = engine.conf['save_dir'] + _sep + ".validation_log.npy"
train_log = engine.conf['save_dir'] + _sep + ".train_log.csv"
val_log = engine.conf['save_dir'] + _sep + ".validation_log.csv"

_np.save(train_log, _np.array(engine.cache[LogKey.TRAIN_LOG]))
_np.save(val_log, _np.array(engine.cache[LogKey.TRAIN_LOG]))
_np.savetxt(train_log, _np.array(engine.cache[LogKey.TRAIN_LOG]), delimiter=',', fmt='%.5f')
_np.savetxt(val_log, _np.array(engine.cache[LogKey.VALIDATION_LOG]), delimiter=',', fmt='%.5f')

engine.cache[LogKey.TRAIN_LOG] = train_log
engine.cache[LogKey.VALIDATION_LOG] = val_log
_utils.save_cache(self.conf, engine.cache, name=engine.conf['name'] + "_train")
engine.cache['_saved'] = True

def _run_test(self, data_split, engine, dataset_cls, distributed=False) -> dict:
test_dataset = engine.data_handle.get_dataset(Phase.TEST, data_split, dataset_cls)
Expand All @@ -233,18 +229,13 @@ def _run_test(self, data_split, engine, dataset_cls, distributed=False) -> dict:
""" Run and save experiment test scores """
engine.cache[
'output_csv_TEST'
] = f"{engine.conf['save_dir']}{_sep}TEST_results_{engine.conf['RUN-ID']}.csv"
] = f"{engine.conf['save_dir']}{_sep}test_results_{engine.conf['RUN-ID']}.csv"
with open(engine.cache[f'output_csv_TEST'], 'w') as rw:
test_out = engine.evaluation(dataloader=dataloader, mode=Phase.TEST,
save_predictions=True, results_writer=rw)

test_meter = engine.reduce_scores([test_out], distributed=False)
engine.cache[LogKey.TEST_METRICS] = [test_meter.get()]
_utils.save_scores(self.conf['save_dir'], engine.cache, name=engine.conf['name'],
file_keys=[LogKey.TEST_METRICS])

if not engine.cache.get('_saved'):
_utils.save_cache(self.conf, engine.cache, name=f"{engine.conf['name']}_test")
engine.cache[LogKey.TEST_METRICS] = f"{test_meter}"
return test_out

def _inference(self, data_split, engine, dataset_cls):
Expand All @@ -260,16 +251,16 @@ def _inference(self, data_split, engine, dataset_cls):

engine.cache[
'output_csv_INFERENCE'
] = f"{engine.conf['save_dir']}{_sep}INFERENCE_results_{engine.conf['RUN-ID']}.csv"
] = f"{engine.conf['save_dir']}{_sep}inference_results_{engine.conf['RUN-ID']}.csv"
with open(engine.cache[f'output_csv_INFERENCE'], 'w') as rw:
engine.inference(dataloader=dataloader, results_writer=rw)
_utils.save_cache(self.conf, engine.cache, name=f"{engine.conf['name']}_inference")

def run(self, runner_cls: typing.Type[ETRunner],
dataset_cls: typing.Type[ETDataset] = ETDataset,
data_handle_cls: typing.Type[ETDataHandle] = ETDataHandle):

if self.conf['is_master']:
"""To avoid problems if the mount is the same location for multiple nodes(usually the case"""
self._maybe_advance_run()
_os.makedirs(self.conf['save_dir'], exist_ok=self.conf['force'])

Expand All @@ -283,6 +274,7 @@ def run(self, runner_cls: typing.Type[ETRunner],
self._run(runner_cls, dataset_cls, data_handle_cls)

def _run(self, runner_cls, dataset_cls, data_handle_cls):
self.conf['RUN-ID'] = f"RUN{self.conf.get('world_rank', 0)}-" + _uuid.uuid4().hex[:8].upper()

engine = runner_cls(
conf=self.conf,
Expand All @@ -292,6 +284,9 @@ def _run(self, runner_cls, dataset_cls, data_handle_cls):
)
)

engine.cache['START-TIME'] = _dtime.now().strftime("%Y-%m-%d %H:%M:%S")
_utils.save_cache(self.conf, {}, name=f"{self.conf['name']}_{self.conf['phase']}".upper())

self._prepare_nn_engine(engine)

data_split = {}
Expand All @@ -307,3 +302,5 @@ def _run(self, runner_cls, dataset_cls, data_handle_cls):
if self.conf['phase'] == Phase.INFERENCE:
self._inference(data_split, engine, dataset_cls)
_cleanup(engine, engine.data_handle)
engine.cache['END-TIME'] = _dtime.now().strftime("%Y-%m-%d %H:%M:%S")
_utils.save_cache(self.conf, engine.cache, name=f"{engine.conf['name']}_{self.conf['phase']}".upper())
2 changes: 1 addition & 1 deletion easytorch/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def _update_scores(_out, _it, _meter):
_update_scores(None, it, meter)

if self.conf['verbose'] and lazy_debug(i, add=epoch):
info(f" Itr:{i}/{len(dataloader)}, {it['meter']}")
info(f" Itr:{i}/{len(dataloader)}, {meter}") # Accumulative score

if self.conf['verbose']:
info(f" {mode}, {meter}")
Expand Down
4 changes: 2 additions & 2 deletions easytorch/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ def clean_recursive(obj):

def save_cache(conf, cache, name=''):
_cache = {**cache, 'conf': conf}
with open(conf['save_dir'] + _os.sep + f"{name}_log.json", 'w') as fp:
with open(conf['save_dir'] + _os.sep + f"{name}.json", 'w') as fp:
try:
log = _copy.deepcopy(_cache)
clean_recursive(log)
_json.dump(log, fp)
except Exception as e:
with open(conf['save_dir'] + _os.sep + f"{name}_log.txt", 'w') as raw:
with open(conf['save_dir'] + _os.sep + f"{name}.txt", 'w') as raw:
raw.write(f"{e}\n")
raw.write(f"{_cache}")
4 changes: 3 additions & 1 deletion easytorch/vision/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ def plot_progress(save_dir, cache, name='', plot_keys=[], num_points=31, epoch=N
r"""
Custom plot to plot data from the cache by keys.
"""
save_to = save_dir + _os.sep + "_plots"
_os.makedirs(save_to, exist_ok=True)
for k in plot_keys:
D = _np.array(cache.get(k, []))
if len(D) == 0 or cache.get('log_header') is None:
Expand Down Expand Up @@ -57,6 +59,6 @@ def plot_progress(save_dir, cache, name='', plot_keys=[], num_points=31, epoch=N
ax.set_xticklabels(xticks_range)

_plt.xlabel('Epochs')
_plt.savefig(save_dir + _os.sep + f"{name}_{k}_{plot_id}.png", bbox_inches='tight')
_plt.savefig(save_to + _os.sep + f"{name}_{k}_{plot_id}.png", bbox_inches='tight')
_plt.close('all')
i = j
2 changes: 1 addition & 1 deletion examples/MNIST_easytorch_CNN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
" def new_meter(self):\n",
" return ETMeter(\n",
" num_averages=2, # Since we are tracing two losses\n",
" cmf=ConfusionMatrix(num_classes=10),\n",
" cmf=ConfusionMatrix(num_classes=10, device=self.device['gpu']),\n",
" auc=AUCROCMetrics()\n",
" )"
]
Expand Down
7 changes: 4 additions & 3 deletions examples/MNIST_easytorch_CNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def init_cache(self):
def new_meter(self):
return ETMeter(
num_averages=2, # Since we are tracing two losses
cmf=ConfusionMatrix(num_classes=10),
auc=AUCROCMetrics()
cmf=ConfusionMatrix(num_classes=10, device=self.device['gpu']),
auc=AUCROCMetrics(),
)


Expand All @@ -53,7 +53,8 @@ def new_meter(self):
transform=transform)

dataloader_args = {'train': {'dataset': train_dataset},
'validation': {'dataset': val_dataset}}
'validation': {'dataset': val_dataset},
'test': {'dataset': val_dataset}}
runner = EasyTorch(phase='train', distributed_validation=True,
batch_size=512, epochs=21,
dataloader_args=dataloader_args,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# This call to setup() does all the work
setup(
name="easytorch",
version="3.8.2",
version="3.8.3",
description="Easy Neural Network Experiments with pytorch",
long_description=_README,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 49b7865

Please sign in to comment.