diff --git a/examples/train_horovod/resnet50/README.md b/examples/train_horovod/resnet50/README.md
deleted file mode 100644
index 5fd7f1f88..000000000
--- a/examples/train_horovod/resnet50/README.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# Distributed Training Example with Intel® Optimization for Horovod*
-
-## Model Information
-
-| Use Case |Framework | Model Repo | Branch Commit Tag | Optional Patch 
-| :---: | :---: | :---: | :---: | :---: |
-| Training | TensorFlow | [Tensorflow-Models](https://github.com/tensorflow/models) | v2.8.0 | itex.yaml <br> itex_dummy.yaml <br> hvd_support_light.patch <br> or hvd_support.patch |
-
-<br>
-
-## Dependency
-- [Tensorflow](https://pypi.org/project/tensorflow/)
-- [Intel® Extension for TensorFlow*](https://pypi.org/project/intel-extension-for-tensorflow/)
-- [Intel® Optimization for Horovod*](https://pypi.org/project/intel-optimization-for-Horovod/)
-- others show as below 
-```
-pip install gin gin-config tensorflow-addons tensorflow-model-optimization tensorflow-datasets
-```
-
-## Model examples preparation
-
-### Model Repo
-```
-WORKSPACE=xxxx # set your workspace folder
-cd $WORKSPACE
-git clone -b v2.8.0 https://github.com/tensorflow/models.git tensorflow-models
-cd tensorflow-models
-git apply path/to/hvd_support_light.patch  # or path/to/hvd_support.patch
-```
-**hvd_support_light.patch** is the minimum change.
-- hvd.init() is Horovod initialization, including resource allocation.
-- tf.config.experimental.set_memory_growth(): If memory growth is enabled, the runtime initialization will not allocate all memory on the device.
-- tf.config.experimental.set_visible_devices(): Set the list of visible devices.
-- strategy_scope: Remove native distributed.
-- hvd.DistributedOptimizer(): use Horovod distributed optimizer.
-- dataset.shard(): Multiple workers run the same code but with different data. Dataset is split equally between different index workers.  
-  
-**hvd_support.patch** adds LARS optimizer [paper](https://arxiv.org/abs/1708.03888) 
-
-### Download Dataset
-Download imagenet dataset from https://image-net.org/download-images.php
-
-
-**Note** Only for non-commercial research and/or educational purposes
-
-<br>
-
-## Execution
-### Set Model Parameters
-Export those parameters to script or environment.
-```
-export PYTHONPATH=${WORKSPACE}/tensorflow-models
-MODEL_DIR=${WORKSPACE}/output
-DATA_DIR=${WORKSPACE}/imagenet_data/imagenet
-
-CONFIG_FILE=path/to/itex.yaml
-NUMBER_OF_PROCESS=2
-PROCESS_PER_NODE=2
-```
-- Download `itex.yaml` or `itex_dummy.yaml` and set one of them as CONFIG_FILE, then model would correspondingly run with `real data` or `dummy data`. Default value is itex.yaml.
-- Set `NUMBER_OF_PROCESS` and `PROCESS_PER_NODE` according to hvd rank number you need. Default value is a 2 rank task.
-### HVD command
-
-```
-if [ ! -d "$MODEL_DIR" ]; then
-    mkdir -p $MODEL_DIR
-else
-    rm -rf $MODEL_DIR && mkdir -p $MODEL_DIR                         
-fi
-
-mpirun -np $NUMBER_OF_PROCESS -ppn $PROCESS_PER_NODE --prepend-rank \
-python ${PYTHONPATH}/official/vision/image_classification/classifier_trainer.py \
---mode=train_and_eval \
---model_type=resnet \
---dataset=imagenet \
---model_dir=$MODEL_DIR \
---data_dir=$DATA_DIR \
---config_file=$CONFIG_FILE
-```
-
-<br>
-
-## OUTPUT
-### Performance Data
-```
-[1] I0909 03:33:23.323099 140645511436096 keras_utils.py:145] TimeHistory: xxxx seconds, xxxx examples/second between steps 0 and 100
-[0] I0909 03:33:23.324534 140611700504384 keras_utils.py:145] TimeHistory: xxxx seconds, xxxx examples/second between steps 0 and 100
-[0] I0909 03:33:43.037004 140611700504384 keras_utils.py:145] TimeHistory: xxxx seconds, xxxx examples/second between steps 100 and 200
-[1] I0909 03:33:43.037142 140645511436096 keras_utils.py:145] TimeHistory: xxxx seconds, xxxx examples/second between steps 100 and 200
-[1] I0909 03:34:03.213994 140645511436096 keras_utils.py:145] TimeHistory: xxxx seconds, xxxx examples/second between steps 200 and 300
-[0] I0909 03:34:03.214127 140611700504384 keras_utils.py:145] TimeHistory: xxxx seconds, xxxx examples/second between steps 200 and 300
-```
diff --git a/examples/train_horovod/resnet50/hvd_support_light.patch b/examples/train_horovod/resnet50/hvd_support_light.patch
deleted file mode 100644
index 6cd993952..000000000
--- a/examples/train_horovod/resnet50/hvd_support_light.patch
+++ /dev/null
@@ -1,233 +0,0 @@
-diff --git a/official/vision/image_classification/classifier_trainer.py b/official/vision/image_classification/classifier_trainer.py
-index ab6fbaea9..f0f2cb2c5 100644
---- a/official/vision/image_classification/classifier_trainer.py
-+++ b/official/vision/image_classification/classifier_trainer.py
-@@ -37,6 +37,14 @@ from official.vision.image_classification.efficientnet import efficientnet_model
- from official.vision.image_classification.resnet import common
- from official.vision.image_classification.resnet import resnet_model
- 
-+global is_mpi
-+try:
-+    import horovod.tensorflow.keras as hvd
-+    hvd.init()
-+    is_mpi = hvd.size()
-+except ImportError:
-+    is_mpi = 0
-+    print("No MPI horovod support, this is running in no-MPI mode!")
- 
- def get_models() -> Mapping[str, tf.keras.Model]:
-   """Returns the mapping from model type name to Keras model."""
-@@ -289,6 +297,12 @@ def train_and_eval(
-   """Runs the train and eval path using compile/fit."""
-   logging.info('Running train and eval.')
- 
-+  if is_mpi:
-+    gpus = tf.config.experimental.list_physical_devices('XPU')
-+    for gpu in gpus:
-+      tf.config.experimental.set_memory_growth(gpu, True)
-+    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'XPU')
-+
-   distribute_utils.configure_cluster(params.runtime.worker_hosts,
-                                      params.runtime.task_index)
- 
-@@ -299,7 +313,7 @@ def train_and_eval(
-       num_gpus=params.runtime.num_gpus,
-       tpu_address=params.runtime.tpu)
- 
--  strategy_scope = distribute_utils.get_strategy_scope(strategy)
-+  #strategy_scope = distribute_utils.get_strategy_scope(strategy)
- 
-   logging.info('Detected %d devices.',
-                strategy.num_replicas_in_sync if strategy else 1)
-@@ -324,56 +338,74 @@ def train_and_eval(
- 
-   logging.info('Global batch size: %d', train_builder.global_batch_size)
- 
--  with strategy_scope:
--    model_params = params.model.model_params.as_dict()
--    model = get_models()[params.model.name](**model_params)
--    learning_rate = optimizer_factory.build_learning_rate(
--        params=params.model.learning_rate,
--        batch_size=train_builder.global_batch_size,
--        train_epochs=train_epochs,
--        train_steps=train_steps)
--    optimizer = optimizer_factory.build_optimizer(
--        optimizer_name=params.model.optimizer.name,
--        base_learning_rate=learning_rate,
--        params=params.model.optimizer.as_dict(),
--        model=model)
--    optimizer = performance.configure_optimizer(
--        optimizer,
--        use_float16=train_builder.dtype == 'float16',
--        loss_scale=get_loss_scale(params))
--
--    metrics_map = _get_metrics(one_hot)
--    metrics = [metrics_map[metric] for metric in params.train.metrics]
--    steps_per_loop = train_steps if params.train.set_epoch_loop else 1
--
--    if one_hot:
--      loss_obj = tf.keras.losses.CategoricalCrossentropy(
--          label_smoothing=params.model.loss.label_smoothing)
--    else:
--      loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()
--    model.compile(
--        optimizer=optimizer,
--        loss=loss_obj,
--        metrics=metrics,
--        steps_per_execution=steps_per_loop)
--
--    initial_epoch = 0
--    if params.train.resume_checkpoint:
--      initial_epoch = resume_from_checkpoint(
--          model=model, model_dir=params.model_dir, train_steps=train_steps)
-+  model_params = params.model.model_params.as_dict()
-+  model = get_models()[params.model.name](**model_params)
-+  learning_rate = optimizer_factory.build_learning_rate(
-+    params=params.model.learning_rate,
-+    batch_size=train_builder.global_batch_size * hvd.size(),
-+    train_epochs=train_epochs,
-+    train_steps=train_steps)
-+  optimizer = optimizer_factory.build_optimizer(
-+    optimizer_name=params.model.optimizer.name,
-+    base_learning_rate=learning_rate,
-+    params=params.model.optimizer.as_dict(),
-+    model=model)
-+  optimizer = performance.configure_optimizer(
-+    optimizer,
-+    use_float16=train_builder.dtype == 'float16',
-+    loss_scale=get_loss_scale(params))
-+
-+  metrics_map = _get_metrics(one_hot)
-+  metrics = [metrics_map[metric] for metric in params.train.metrics]
-+  steps_per_loop = train_steps if params.train.set_epoch_loop else 1
- 
--    callbacks = custom_callbacks.get_callbacks(
--        model_checkpoint=params.train.callbacks.enable_checkpoint_and_export,
--        include_tensorboard=params.train.callbacks.enable_tensorboard,
--        time_history=params.train.callbacks.enable_time_history,
--        track_lr=params.train.tensorboard.track_lr,
--        write_model_weights=params.train.tensorboard.write_model_weights,
--        initial_step=initial_epoch * train_steps,
--        batch_size=train_builder.global_batch_size,
--        log_steps=params.train.time_history.log_steps,
--        model_dir=params.model_dir,
--        backup_and_restore=params.train.callbacks.enable_backup_and_restore)
-+  if one_hot:
-+    loss_obj = tf.keras.losses.CategoricalCrossentropy(
-+      label_smoothing=params.model.loss.label_smoothing)
-+  else:
-+    loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()
- 
-+  hvd_optimizer = hvd.DistributedOptimizer(optimizer, num_groups=1)
-+  model.compile(
-+    optimizer=hvd_optimizer,
-+    loss=loss_obj,
-+    metrics=metrics,
-+    steps_per_execution=steps_per_loop)
-+
-+  initial_epoch = 0
-+  if params.train.resume_checkpoint:
-+    initial_epoch = resume_from_checkpoint(
-+      model=model, model_dir=params.model_dir, train_steps=train_steps)
-+
-+  # Add broadcast callback for rank0
-+  callbacks = []
-+
-+  if hvd.local_rank() == 0:
-+    callbacks = custom_callbacks.get_callbacks(
-+      model_checkpoint=params.train.callbacks.enable_checkpoint_and_export,
-+      include_tensorboard=params.train.callbacks.enable_tensorboard,
-+      time_history=params.train.callbacks.enable_time_history,
-+      track_lr=params.train.tensorboard.track_lr,
-+      write_model_weights=params.train.tensorboard.write_model_weights,
-+      initial_step=initial_epoch * train_steps,
-+      batch_size=train_builder.global_batch_size,
-+      log_steps=params.train.time_history.log_steps,
-+      model_dir=params.model_dir,
-+      backup_and_restore=params.train.callbacks.enable_backup_and_restore)
-+  else:
-+    callbacks = custom_callbacks.get_callbacks(
-+      model_checkpoint=False,
-+      include_tensorboard=params.train.callbacks.enable_tensorboard,
-+      time_history=params.train.callbacks.enable_time_history,
-+      track_lr=params.train.tensorboard.track_lr,
-+      write_model_weights=params.train.tensorboard.write_model_weights,
-+      initial_step=initial_epoch * train_steps,
-+      batch_size=train_builder.global_batch_size,
-+      log_steps=params.train.time_history.log_steps,
-+      model_dir=params.model_dir,
-+      backup_and_restore=params.train.callbacks.enable_backup_and_restore)
-+
-+  callbacks.append(hvd.callbacks.BroadcastGlobalVariablesCallback(0))
-   serialize_config(params=params, model_dir=params.model_dir)
- 
-   if params.evaluation.skip_eval:
-diff --git a/official/vision/image_classification/dataset_factory.py b/official/vision/image_classification/dataset_factory.py
-index a0458eccc..e6dfb39f6 100644
---- a/official/vision/image_classification/dataset_factory.py
-+++ b/official/vision/image_classification/dataset_factory.py
-@@ -29,6 +29,7 @@ import tensorflow_datasets as tfds
- from official.modeling.hyperparams import base_config
- from official.vision.image_classification import augment
- from official.vision.image_classification import preprocessing
-+import horovod.tensorflow as hvd
- 
- AUGMENTERS = {
-     'autoaugment': augment.AutoAugment,
-@@ -207,7 +208,7 @@ class DatasetBuilder:
-   def num_steps(self) -> int:
-     """The number of steps (batches) to exhaust this dataset."""
-     # Always divide by the global batch size to get the correct # of steps
--    return self.num_examples // self.global_batch_size
-+    return self.num_examples // (self.global_batch_size * hvd.size())
- 
-   @property
-   def dtype(self) -> tf.dtypes.DType:
-@@ -403,14 +404,10 @@ class DatasetBuilder:
-     Returns:
-       A TensorFlow dataset outputting batched images and labels.
-     """
--    if (self.config.builder != 'tfds' and self.input_context and
--        self.input_context.num_input_pipelines > 1):
--      dataset = dataset.shard(self.input_context.num_input_pipelines,
--                              self.input_context.input_pipeline_id)
-+    if self.is_training:
-+      dataset = dataset.shard(hvd.size(), hvd.rank())
-       logging.info(
--          'Sharding the dataset: input_pipeline_id=%d '
--          'num_input_pipelines=%d', self.input_context.num_input_pipelines,
--          self.input_context.input_pipeline_id)
-+          'Sharding the dataset: total size: %d ', hvd.size(), " local rank: %d ", hvd.rank())
- 
-     if self.is_training and self.config.builder == 'records':
-       # Shuffle the input files.
-diff --git a/official/vision/image_classification/learning_rate.py b/official/vision/image_classification/learning_rate.py
-index 72f7e9518..e7edd90a2 100644
---- a/official/vision/image_classification/learning_rate.py
-+++ b/official/vision/image_classification/learning_rate.py
-@@ -22,10 +22,12 @@ from typing import Any, Mapping, Optional
- 
- import numpy as np
- import tensorflow as tf
-+from tensorflow.python.util.tf_export import keras_export
- 
- BASE_LEARNING_RATE = 0.1
- 
- 
-+@tf.keras.utils.register_keras_serializable(package='Custom', name='WarmupDeacySchedule')
- class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
-   """A wrapper for LearningRateSchedule that includes warmup steps."""
- 
-@@ -66,10 +68,11 @@ class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
-     return lr
- 
-   def get_config(self) -> Mapping[str, Any]:
--    config = self._lr_schedule.get_config()
-+    config = {}
-     config.update({
-         "warmup_steps": self._warmup_steps,
-         "warmup_lr": self._warmup_lr,
-+        "lr_schedule": self._lr_schedule,
-     })
-     return config
- 
diff --git a/examples/train_resnet50/README.md b/examples/train_resnet50/README.md
new file mode 100644
index 000000000..42efac776
--- /dev/null
+++ b/examples/train_resnet50/README.md
@@ -0,0 +1,130 @@
+# Resnet50 train on Intel GPU
+
+## Introduction
+
+Intel® Extension for TensorFlow* is compatible with stock Tensorflow*. 
+This example shows resnet50 training.
+
+## Hardware Requirements
+
+Verified Hardware Platforms:
+ - Intel® Data Center GPU Max Series
+ 
+## Prerequisites
+
+### Model Code change
+We optimized bf16 in resnet50.patch, and enable horovod and LARS in hvd_support.patch, please apply patch
+```
+git clone -b v2.8.0 https://github.com/tensorflow/models.git tensorflow-models
+```
+
+### Prepare for GPU (Skip this step for CPU)
+
+Refer to [Prepare](../common_guide_running.md##Prepare)
+
+### Setup Running Environment
+
+* Setup for GPU
+```bash
+./pip_set_env.sh
+```
+
+### Enable Running Environment
+
+Enable oneAPI running environment (only for GPU) and virtual running environment.
+
+   * For GPU, refer to [Running](../common_guide_running.md##Running)
+
+### Apply Patch
+
+#### If not use Horovod
+```
+git apply path/to/configure/resnet50.patch
+```
+
+#### If use Horovod
+```
+git apply path/to/hvd_configure/hvd_support.patch
+```
+#### Prepare ImageNet dataset
+Using TFDS
+classifier_trainer.py supports ImageNet with [TensorFlow Datasets(TFDS)](https://www.tensorflow.org/datasets/overview) .
+
+Please see the following [example snippet](https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/scripts/download_and_prepare.py) for more information on how to use TFDS to download and prepare datasets, and specifically the [TFDS ImageNet readme](https://github.com/tensorflow/datasets/blob/master/docs/catalog/imagenet2012.md) for manual download instructions.
+
+Legacy TFRecords
+Download the ImageNet dataset and convert it to TFRecord format. The following [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py) and [README](https://github.com/tensorflow/tpu/tree/master/tools/datasets#imagenet_to_gcspy) provide a few options.
+
+Note that the legacy ResNet runners, e.g. [resnet/resnet_ctl_imagenet_main.py](https://github.com/tensorflow/models/blob/v2.8.0/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py) require TFRecords whereas `classifier_trainer.py` can use both by setting the builder to 'records' or 'tfds' in the configurations.
+
+## Execution
+### Set Model Parameters
+There are several config yaml files in configure and hvd_configure folder. Set one of them as CONFIG_FILE, then model would correspondly run with `real data` or `dummy data`. Single-tile please use yaml file in configure folder. Distribute training please use yaml file in hvd_configure folder, `itex_bf16_lars.yaml`/`itex_fp32_lars.yaml` for HVD real data and `itex_dummy_bf16_lars.yaml`/`itex_dummy_fp32_lars.yaml` for HVD dummy data.
+Export those parameters to script or environment.
+```
+export PYTHONPATH=/the/path/to/tensorflow-models
+MODEL_DIR=/the/path/to/output
+DATA_DIR=/the/path/to/imagenet
+CONFIG_FILE=path/to/itex_xx.yaml  # itex_bf16.yaml/itex_fp32.yaml for accuracy, itex_dummy_bf16.yaml/itex_dummy_fp32.yaml for benchmark
+
+```
+
+### Command
+
+```
+if [ ! -d "$MODEL_DIR" ]; then
+    mkdir -p $MODEL_DIR
+else
+    rm -rf $MODEL_DIR && mkdir -p $MODEL_DIR                         
+fi
+
+python ${PYTHONPATH}/official/vision/image_classification/classifier_trainer.py \
+--mode=train_and_eval \
+--model_type=resnet \
+--dataset=imagenet \
+--model_dir=$MODEL_DIR \
+--data_dir=$DATA_DIR \
+--config_file=$CONFIG_FILE
+```
+
+### Command with Horovod
+Set `NUMBER_OF_PROCESS` and `PROCESS_PER_NODE` according to hvd rank number you need. Default value is 2 rank task.
+
+```
+if [ ! -d "$MODEL_DIR" ]; then
+    mkdir -p $MODEL_DIR
+else
+    rm -rf $MODEL_DIR && mkdir -p $MODEL_DIR                         
+fi
+
+NUMBER_OF_PROCESS=2
+PROCESS_PER_NODE=2
+
+mpirun -np $NUMBER_OF_PROCESS -ppn $PROCESS_PER_NODE --prepend-rank \
+python ${PYTHONPATH}/official/vision/image_classification/classifier_trainer.py \
+--mode=train_and_eval \
+--model_type=resnet \
+--dataset=imagenet \
+--model_dir=$MODEL_DIR \
+--data_dir=$DATA_DIR \
+--config_file=$CONFIG_FILE
+```
+
+## Example Output without hvd
+```
+I0203 02:48:01.006297 139660941027136 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 1900 and 2000
+I0203 02:48:16.590331 139660941027136 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 2000 and 2100
+I0203 02:48:32.178206 139660941027136 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 2100 and 2200
+I0203 02:48:47.790128 139660941027136 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 2200 and 2300
+I0203 02:49:03.408512 139660941027136 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 2300 and 2400
+```
+## Example Output with hvd
+```
+[0] I0817 00:09:07.602742 139898862851904 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 400 and 600
+[1] I0817 00:09:07.603262 140612319840064 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 400 and 600
+[0] I0817 00:10:07.917546 139898862851904 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 600 and 800
+[1] I0817 00:10:07.917738 140612319840064 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 600 and 800
+[0] I0817 00:11:08.277716 139898862851904 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 800 and 1000
+[1] I0817 00:11:08.277811 140612319840064 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 800 and 1000
+[0] I0817 00:12:08.555174 139898862851904 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 1000 and 1200
+[1] I0817 00:12:08.555221 140612319840064 keras_utils.py:145] TimeHistory: xx seconds, xxxx examples/second between steps 1000 and 1200
\ No newline at end of file
diff --git a/examples/train_horovod/resnet50/itex.yaml b/examples/train_resnet50/configure/itex_bf16.yaml
similarity index 100%
rename from examples/train_horovod/resnet50/itex.yaml
rename to examples/train_resnet50/configure/itex_bf16.yaml
diff --git a/examples/train_horovod/resnet50/itex_dummy.yaml b/examples/train_resnet50/configure/itex_dummy_bf16.yaml
similarity index 98%
rename from examples/train_horovod/resnet50/itex_dummy.yaml
rename to examples/train_resnet50/configure/itex_dummy_bf16.yaml
index d826f5a88..f8c678026 100644
--- a/examples/train_horovod/resnet50/itex_dummy.yaml
+++ b/examples/train_resnet50/configure/itex_dummy_bf16.yaml
@@ -50,6 +50,6 @@ train:
   callbacks:
     enable_checkpoint_and_export: True
   resume_checkpoint: True
-  epochs: 90
+  epochs: 1
 evaluation:
   epochs_between_evals: 1
diff --git a/examples/train_resnet50/configure/itex_dummy_fp32.yaml b/examples/train_resnet50/configure/itex_dummy_fp32.yaml
new file mode 100644
index 000000000..d360c41c9
--- /dev/null
+++ b/examples/train_resnet50/configure/itex_dummy_fp32.yaml
@@ -0,0 +1,55 @@
+# Training configuration for ResNet trained on ImageNet on TPUs.
+# Takes ~4 minutes, 30 seconds seconds per epoch for a v3-32.
+# Reaches > 76.1% within 90 epochs.
+# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
+runtime:
+  distribution_strategy: 'mirrored'
+  num_gpus: 1
+train_dataset:
+  name: 'imagenet2012'
+  data_dir: None
+  builder: 'synthetic'
+  split: 'train'
+  one_hot: False
+  image_size: 224
+  num_classes: 1000
+  num_examples: 1281167
+  batch_size: 256
+  use_per_replica_batch_size: True
+  mean_subtract: False
+  standardize: False
+  dtype: 'float32'
+validation_dataset:
+  name: 'imagenet2012'
+  data_dir: None
+  builder: 'synthetic'
+  split: 'validation'
+  one_hot: False
+  image_size: 224
+  num_classes: 1000
+  num_examples: 50000
+  batch_size: 256
+  use_per_replica_batch_size: True
+  mean_subtract: False
+  standardize: False
+  dtype: 'float32'
+model:
+  name: 'resnet'
+  model_params:
+    rescale_inputs: True
+  optimizer:
+    name: 'momentum'
+    momentum: 0.9
+    decay: 0.9
+    epsilon: 0.001
+    moving_average_decay: 0.
+    lookahead: False
+  loss:
+    label_smoothing: 0.1
+train:
+  callbacks:
+    enable_checkpoint_and_export: True
+  resume_checkpoint: True
+  epochs: 1
+evaluation:
+  epochs_between_evals: 1
diff --git a/examples/train_resnet50/configure/itex_fp32.yaml b/examples/train_resnet50/configure/itex_fp32.yaml
new file mode 100644
index 000000000..27c0b707d
--- /dev/null
+++ b/examples/train_resnet50/configure/itex_fp32.yaml
@@ -0,0 +1,55 @@
+# Training configuration for ResNet trained on ImageNet on TPUs.
+# Takes ~4 minutes, 30 seconds seconds per epoch for a v3-32.
+# Reaches > 76.1% within 90 epochs.
+# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
+runtime:
+  distribution_strategy: 'mirrored'
+  num_gpus: 1
+train_dataset:
+  name: 'imagenet2012'
+  data_dir: None
+  builder: 'records'
+  split: 'train'
+  one_hot: False
+  image_size: 224
+  num_classes: 1000
+  num_examples: 1281167
+  batch_size: 256
+  use_per_replica_batch_size: True
+  mean_subtract: False
+  standardize: False
+  dtype: 'float32'
+validation_dataset:
+  name: 'imagenet2012'
+  data_dir: None
+  builder: 'records'
+  split: 'validation'
+  one_hot: False
+  image_size: 224
+  num_classes: 1000
+  num_examples: 50000
+  batch_size: 256
+  use_per_replica_batch_size: True
+  mean_subtract: False
+  standardize: False
+  dtype: 'float32'
+model:
+  name: 'resnet'
+  model_params:
+    rescale_inputs: True
+  optimizer:
+    name: 'momentum'
+    momentum: 0.9
+    decay: 0.9
+    epsilon: 0.001
+    moving_average_decay: 0.
+    lookahead: False
+  loss:
+    label_smoothing: 0.1
+train:
+  callbacks:
+    enable_checkpoint_and_export: True
+  resume_checkpoint: True
+  epochs: 90
+evaluation:
+  epochs_between_evals: 1
diff --git a/examples/train_resnet50/configure/resnet50.patch b/examples/train_resnet50/configure/resnet50.patch
new file mode 100644
index 000000000..d5f946aad
--- /dev/null
+++ b/examples/train_resnet50/configure/resnet50.patch
@@ -0,0 +1,43 @@
+diff --git a/official/vision/image_classification/augment.py b/official/vision/image_classification/augment.py
+index f322d31da..294fcd543 100644
+--- a/official/vision/image_classification/augment.py
++++ b/official/vision/image_classification/augment.py
+@@ -25,7 +25,10 @@ from __future__ import print_function
+ import math
+ from typing import Any, Dict, List, Optional, Text, Tuple
+ 
+-from keras.layers.preprocessing import image_preprocessing as image_ops
++try:
++    from keras.src.layers.preprocessing import image_preprocessing as image_ops
++except ImportError:
++    from keras.layers.preprocessing import image_preprocessing as image_ops
+ import tensorflow as tf
+ 
+ 
+diff --git a/official/vision/image_classification/classifier_trainer.py b/official/vision/image_classification/classifier_trainer.py
+index ab6fbaea9..0347bc8c9 100644
+--- a/official/vision/image_classification/classifier_trainer.py
++++ b/official/vision/image_classification/classifier_trainer.py
+@@ -283,6 +283,14 @@ def serialize_config(params: base_configs.ExperimentConfig, model_dir: str):
+   hyperparams.save_params_dict_to_yaml(params, params_save_path)
+ 
+ 
++class dummy_context:
++  def __init__(self):
++    pass
++  def __enter__(self):
++    pass
++  def __exit__(self, exc_type, exc_value, traceback):
++    pass
++
+ def train_and_eval(
+     params: base_configs.ExperimentConfig,
+     strategy_override: tf.distribute.Strategy) -> Mapping[str, Any]:
+@@ -323,6 +331,7 @@ def train_and_eval(
+   initialize(params, train_builder)
+ 
+   logging.info('Global batch size: %d', train_builder.global_batch_size)
++  strategy_scope = dummy_context()
+ 
+   with strategy_scope:
+     model_params = params.model.model_params.as_dict()
diff --git a/examples/train_horovod/resnet50/hvd_support.patch b/examples/train_resnet50/hvd_configure/hvd_support.patch
similarity index 75%
rename from examples/train_horovod/resnet50/hvd_support.patch
rename to examples/train_resnet50/hvd_configure/hvd_support.patch
index a79c71129..860d2adfc 100644
--- a/examples/train_horovod/resnet50/hvd_support.patch
+++ b/examples/train_resnet50/hvd_configure/hvd_support.patch
@@ -1,17 +1,47 @@
+diff --git a/official/legacy/image_classification/callbacks.py b/official/legacy/image_classification/callbacks.py
+index a4934ed88..a7eaad9fb 100644
+--- a/official/legacy/image_classification/callbacks.py
++++ b/official/legacy/image_classification/callbacks.py
+@@ -149,8 +149,8 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
+   def _calculate_metrics(self) -> MutableMapping[str, Any]:
+     logs = {}
+     # TODO(b/149030439): disable LR reporting.
+-    # if self._track_lr:
+-    #   logs['learning_rate'] = self._calculate_lr()
++    if self._track_lr:
++      logs['learning_rate'] = self._calculate_lr()
+     return logs
+ 
+   def _calculate_lr(self) -> int:
+diff --git a/official/vision/image_classification/augment.py b/official/vision/image_classification/augment.py
+index f322d31da..e668ff731 100644
+--- a/official/vision/image_classification/augment.py
++++ b/official/vision/image_classification/augment.py
+@@ -25,7 +25,10 @@ from __future__ import print_function
+ import math
+ from typing import Any, Dict, List, Optional, Text, Tuple
+ 
+-from keras.layers.preprocessing import image_preprocessing as image_ops
++try:
++  from keras.src.layers.preprocessing import image_preprocessing as image_ops
++except ImportError:
++  from keras.layers.preprocessing import image_preprocessing as image_ops
+ import tensorflow as tf
+ 
+ 
 diff --git a/official/vision/image_classification/callbacks.py b/official/vision/image_classification/callbacks.py
-index a4934ed88..2befe5e18 100644
+index a4934ed88..7eaaf0bea 100644
 --- a/official/vision/image_classification/callbacks.py
 +++ b/official/vision/image_classification/callbacks.py
-@@ -78,6 +78,8 @@ def get_callbacks(
+@@ -78,6 +78,7 @@ def get_callbacks(
              save_weights_only=True,
              verbose=1))
      callbacks.append(MovingAverageCallback())
-+    callbacks.append(ThresholdStopping(
-+      monitor = "val_accuracy", threshold = 0.759))
++  #callbacks.append(ThresholdStopping(monitor = "val_accuracy", threshold = 0.759))
    return callbacks
  
  
-@@ -254,3 +256,38 @@ class AverageModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
+@@ -254,3 +255,38 @@ class AverageModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
        result = super()._save_model(epoch, logs)  # pytype: disable=attribute-error  # typed-keras
        self.model.set_weights(non_avg_weights)
        return result
@@ -51,12 +81,14 @@ index a4934ed88..2befe5e18 100644
 +    return monitor_value
 +
 diff --git a/official/vision/image_classification/classifier_trainer.py b/official/vision/image_classification/classifier_trainer.py
-index ab6fbaea9..f0f2cb2c5 100644
+index ab6fbaea9..4655da449 100644
 --- a/official/vision/image_classification/classifier_trainer.py
 +++ b/official/vision/image_classification/classifier_trainer.py
-@@ -37,6 +37,14 @@ from official.vision.image_classification.efficientnet import efficientnet_model
+@@ -36,7 +36,16 @@ from official.vision.image_classification.configs import configs
+ from official.vision.image_classification.efficientnet import efficientnet_model
  from official.vision.image_classification.resnet import common
  from official.vision.image_classification.resnet import resnet_model
++import time
  
 +global is_mpi
 +try:
@@ -69,7 +101,16 @@ index ab6fbaea9..f0f2cb2c5 100644
  
  def get_models() -> Mapping[str, tf.keras.Model]:
    """Returns the mapping from model type name to Keras model."""
-@@ -289,6 +297,12 @@ def train_and_eval(
+@@ -271,7 +280,7 @@ def define_classifier_flags():
+       help='The name of the dataset, e.g. ImageNet, etc.')
+   flags.DEFINE_integer(
+       'log_steps',
+-      default=100,
++      default=200,
+       help='The interval of steps between logging of batch level stats.')
+ 
+ 
+@@ -289,6 +298,12 @@ def train_and_eval(
    """Runs the train and eval path using compile/fit."""
    logging.info('Running train and eval.')
  
@@ -82,7 +123,7 @@ index ab6fbaea9..f0f2cb2c5 100644
    distribute_utils.configure_cluster(params.runtime.worker_hosts,
                                       params.runtime.task_index)
  
-@@ -299,7 +313,7 @@ def train_and_eval(
+@@ -299,7 +314,7 @@ def train_and_eval(
        num_gpus=params.runtime.num_gpus,
        tpu_address=params.runtime.tpu)
  
@@ -91,7 +132,7 @@ index ab6fbaea9..f0f2cb2c5 100644
  
    logging.info('Detected %d devices.',
                 strategy.num_replicas_in_sync if strategy else 1)
-@@ -324,56 +338,74 @@ def train_and_eval(
+@@ -324,56 +339,74 @@ def train_and_eval(
  
    logging.info('Global batch size: %d', train_builder.global_batch_size)
  
@@ -169,14 +210,14 @@ index ab6fbaea9..f0f2cb2c5 100644
 +      label_smoothing=params.model.loss.label_smoothing)
 +  else:
 +    loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()
- 
++
 +  hvd_optimizer = hvd.DistributedOptimizer(optimizer, num_groups=1)
 +  model.compile(
 +    optimizer=hvd_optimizer,
 +    loss=loss_obj,
 +    metrics=metrics,
 +    steps_per_execution=steps_per_loop)
-+
+ 
 +  initial_epoch = 0
 +  if params.train.resume_checkpoint:
 +    initial_epoch = resume_from_checkpoint(
@@ -214,6 +255,29 @@ index ab6fbaea9..f0f2cb2c5 100644
    serialize_config(params=params, model_dir=params.model_dir)
  
    if params.evaluation.skip_eval:
+@@ -384,7 +417,9 @@ def train_and_eval(
+         'validation_steps': validation_steps,
+         'validation_freq': params.evaluation.epochs_between_evals,
+     }
+-
++  print('[info] Training steps = ', train_steps)
++  print('[info] Validation steps = ', validation_steps)
++  global_start_time = time.time()
+   history = model.fit(
+       train_dataset,
+       epochs=train_epochs,
+@@ -394,6 +429,11 @@ def train_and_eval(
+       verbose=2,
+       **validation_kwargs)
+ 
++  global_end_time = time.time()
++  print('[info] Global start time = ', time.asctime(time.localtime(global_start_time)))
++  print('[info] Global end time = ', time.asctime(time.localtime(global_end_time)))
++  print('[info] Global consume time = ', ((global_end_time - global_start_time) / (60.0)), ' mins')
++
+   validation_output = None
+   if not params.evaluation.skip_eval:
+     validation_output = model.evaluate(
 diff --git a/official/vision/image_classification/configs/base_configs.py b/official/vision/image_classification/configs/base_configs.py
 index 760b3dce0..3939249b6 100644
 --- a/official/vision/image_classification/configs/base_configs.py
@@ -227,7 +291,7 @@ index 760b3dce0..3939249b6 100644
  
  @dataclasses.dataclass
 diff --git a/official/vision/image_classification/dataset_factory.py b/official/vision/image_classification/dataset_factory.py
-index a0458eccc..9ff76333b 100644
+index a0458eccc..275c6d5fb 100644
 --- a/official/vision/image_classification/dataset_factory.py
 +++ b/official/vision/image_classification/dataset_factory.py
 @@ -29,6 +29,7 @@ import tensorflow_datasets as tfds
@@ -238,16 +302,25 @@ index a0458eccc..9ff76333b 100644
  
  AUGMENTERS = {
      'autoaugment': augment.AutoAugment,
-@@ -207,7 +208,7 @@ class DatasetBuilder:
+@@ -207,7 +208,16 @@ class DatasetBuilder:
    def num_steps(self) -> int:
      """The number of steps (batches) to exhaust this dataset."""
      # Always divide by the global batch size to get the correct # of steps
 -    return self.num_examples // self.global_batch_size
-+    return self.num_examples // (self.global_batch_size * hvd.size())
++    distributed_size = 1
++    if self.config.split == 'train':
++      distributed_size = hvd.size()
++    divide_steps = self.num_examples // (self.global_batch_size * distributed_size)
++    remain_steps = self.num_examples % (self.global_batch_size * distributed_size)
++    if remain_steps == 0:
++      return divide_steps
++    else:
++      return divide_steps + 1
++    #return self.num_examples // (self.global_batch_size * hvd.size())
  
    @property
    def dtype(self) -> tf.dtypes.DType:
-@@ -403,14 +404,9 @@ class DatasetBuilder:
+@@ -403,14 +413,10 @@ class DatasetBuilder:
      Returns:
        A TensorFlow dataset outputting batched images and labels.
      """
@@ -255,22 +328,35 @@ index a0458eccc..9ff76333b 100644
 -        self.input_context.num_input_pipelines > 1):
 -      dataset = dataset.shard(self.input_context.num_input_pipelines,
 -                              self.input_context.input_pipeline_id)
--      logging.info(
++    if self.is_training:
++      dataset = dataset.shard(hvd.size(), hvd.rank())
+       logging.info(
 -          'Sharding the dataset: input_pipeline_id=%d '
 -          'num_input_pipelines=%d', self.input_context.num_input_pipelines,
 -          self.input_context.input_pipeline_id)
-+    if self.is_training:
-+      dataset = dataset.shard(hvd.size(), hvd.rank())
-+      print("Sharding the dataset: total size: ", hvd.size(), " local rank: ", hvd.rank())
++          'Sharding the dataset: total size: %d ', hvd.size(), " local rank: %d ", hvd.rank())
  
      if self.is_training and self.config.builder == 'records':
        # Shuffle the input files.
+@@ -455,10 +461,10 @@ class DatasetBuilder:
+       # replicas automatically when strategy.distribute_datasets_from_function
+       # is called, so we use local batch size here.
+       dataset = dataset.batch(
+-          self.local_batch_size, drop_remainder=self.is_training)
++          self.local_batch_size, drop_remainder=False)
+     else:
+       dataset = dataset.batch(
+-          self.global_batch_size, drop_remainder=self.is_training)
++          self.global_batch_size, drop_remainder=False)
+ 
+     # Prefetch overlaps in-feed with training
+     dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
 diff --git a/official/vision/image_classification/lars_optimizer.py b/official/vision/image_classification/lars_optimizer.py
 new file mode 100644
-index 000000000..e54a9738c
+index 000000000..029ae654e
 --- /dev/null
 +++ b/official/vision/image_classification/lars_optimizer.py
-@@ -0,0 +1,239 @@
+@@ -0,0 +1,248 @@
 +# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 +#
 +# Licensed under the Apache License, Version 2.0 (the "License");
@@ -304,13 +390,21 @@ index 000000000..e54a9738c
 +# from tensorflow.python.keras import backend_config
 +# from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 +tf_minor = int(tf.__version__.split('.')[1])
-+if tf_minor >= 7:
-+  if tf_minor >= 12:
-+    from keras.optimizers.legacy import optimizer_v2
-+  elif tf_minor >= 9:
-+    from keras.optimizers.optimizer_v2 import optimizer_v2
-+  else:
-+    from keras.optimizer_v2 import optimizer_v2
++if tf_minor >=13:
++  from keras.src.optimizers.legacy import optimizer_v2
++  from keras.src import backend_config
++elif tf_minor >= 12:
++  from keras.optimizers.legacy import optimizer_v2
++  from keras import backend_config
++elif tf_minor >= 9:
++  from keras.optimizers.optimizer_v2 import optimizer_v2
++  from keras import backend_config
++elif tf_minor >= 6:
++  from keras.optimizer_v2 import optimizer_v2
++  from keras import backend_config
++else:
++  from tensorflow.python.keras.optimizer_v2 import optimizer_v2
++  from tensorflow.python.keras import backend_config
 +
 +# class LARSOptimizer(optimizer_v2modified.OptimizerV2Modified):
 +class LARSOptimizer(optimizer_v2.OptimizerV2):
@@ -508,10 +602,11 @@ index 000000000..e54a9738c
 +        "eeta": self.eeta,
 +        "epsilon": self.epsilon,
 +        "use_nesterov": self.use_nesterov,
++        "skip_list": self._skip_list,
 +    })
 +    return config
 diff --git a/official/vision/image_classification/learning_rate.py b/official/vision/image_classification/learning_rate.py
-index 72f7e9518..bb5621f04 100644
+index 72f7e9518..19207291d 100644
 --- a/official/vision/image_classification/learning_rate.py
 +++ b/official/vision/image_classification/learning_rate.py
 @@ -22,10 +22,14 @@ from typing import Any, Mapping, Optional
@@ -621,9 +716,9 @@ index 72f7e9518..bb5621f04 100644
 +
 +  def _get_learning_rate(self, step):
 +    with ops.name_scope_v2(self.name or 'PolynomialDecayWithWarmup') as name:
-+      initial_learning_rate = ops.convert_to_tensor_v2(
++      initial_learning_rate = ops.convert_to_tensor(
 +        self.initial_learning_rate, name='initial_learning_rate')
-+      warmup_steps = ops.convert_to_tensor_v2(
++      warmup_steps = ops.convert_to_tensor(
 +        self.warmup_steps, name='warmup_steps')
 +      step = tf.cast(step, tf.float32)
 +      warmup_rate = (
@@ -648,7 +743,7 @@ index 72f7e9518..bb5621f04 100644
 +    }
 +
 diff --git a/official/vision/image_classification/optimizer_factory.py b/official/vision/image_classification/optimizer_factory.py
-index 48a4512ee..5f98744aa 100644
+index 48a4512ee..0ed4cb914 100644
 --- a/official/vision/image_classification/optimizer_factory.py
 +++ b/official/vision/image_classification/optimizer_factory.py
 @@ -26,6 +26,7 @@ import tensorflow_addons as tfa
@@ -659,6 +754,39 @@ index 48a4512ee..5f98744aa 100644
  
  # pylint: disable=protected-access
  
+@@ -61,12 +62,12 @@ def build_optimizer(
+   if optimizer_name == 'sgd':
+     logging.info('Using SGD optimizer')
+     nesterov = params.get('nesterov', False)
+-    optimizer = tf.keras.optimizers.SGD(
++    optimizer = tf.keras.optimizers.legacy.SGD(
+         learning_rate=base_learning_rate, nesterov=nesterov)
+   elif optimizer_name == 'momentum':
+     logging.info('Using momentum optimizer')
+     nesterov = params.get('nesterov', False)
+-    optimizer = tf.keras.optimizers.SGD(
++    optimizer = tf.keras.optimizers.legacy.SGD(
+         learning_rate=base_learning_rate,
+         momentum=params['momentum'],
+         nesterov=nesterov)
+@@ -75,7 +76,7 @@ def build_optimizer(
+     rho = params.get('decay', None) or params.get('rho', 0.9)
+     momentum = params.get('momentum', 0.9)
+     epsilon = params.get('epsilon', 1e-07)
+-    optimizer = tf.keras.optimizers.RMSprop(
++    optimizer = tf.keras.optimizers.legacy.RMSprop(
+         learning_rate=base_learning_rate,
+         rho=rho,
+         momentum=momentum,
+@@ -85,7 +86,7 @@ def build_optimizer(
+     beta_1 = params.get('beta_1', 0.9)
+     beta_2 = params.get('beta_2', 0.999)
+     epsilon = params.get('epsilon', 1e-07)
+-    optimizer = tf.keras.optimizers.Adam(
++    optimizer = tf.keras.optimizers.legacy.Adam(
+         learning_rate=base_learning_rate,
+         beta_1=beta_1,
+         beta_2=beta_2,
 @@ -102,6 +103,16 @@ def build_optimizer(
          beta_1=beta_1,
          beta_2=beta_2,
@@ -676,6 +804,15 @@ index 48a4512ee..5f98744aa 100644
    else:
      raise ValueError('Unknown optimizer %s' % optimizer_name)
  
+@@ -139,7 +150,7 @@ def build_learning_rate(params: base_configs.LearningRateConfig,
+   else:
+     warmup_steps = 0
+ 
+-  lr_multiplier = params.scale_by_batch_size
++  lr_multiplier = 0 #params.scale_by_batch_size
+ 
+   if lr_multiplier and lr_multiplier > 0:
+     # Scale the learning rate based on the batch size and a multiplier
 @@ -172,6 +183,14 @@ def build_learning_rate(params: base_configs.LearningRateConfig,
          batch_size=batch_size,
          total_steps=train_epochs * train_steps,
@@ -691,3 +828,38 @@ index 48a4512ee..5f98744aa 100644
    if warmup_steps > 0:
      if decay_type not in ['cosine_with_warmup']:
        logging.info('Applying %d warmup steps to the learning rate',
+diff --git a/official/vision/image_classification/resnet/imagenet_preprocessing.py b/official/vision/image_classification/resnet/imagenet_preprocessing.py
+index 86ba3ed98..298a4a412 100644
+--- a/official/vision/image_classification/resnet/imagenet_preprocessing.py
++++ b/official/vision/image_classification/resnet/imagenet_preprocessing.py
+@@ -113,7 +113,7 @@ def process_record_dataset(dataset,
+   dataset = dataset.map(
+       lambda value: parse_record_fn(value, is_training, dtype),
+       num_parallel_calls=tf.data.experimental.AUTOTUNE)
+-  dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
++  dataset = dataset.batch(batch_size, drop_remainder=False)
+ 
+   # Operations between the final prefetch and the get_next call to the iterator
+   # will happen synchronously during run time. We prefetch here again to
+@@ -350,7 +350,7 @@ def input_fn(is_training,
+       parse_record_fn=parse_record_fn,
+       dtype=dtype,
+       datasets_num_private_threads=datasets_num_private_threads,
+-      drop_remainder=drop_remainder,
++      drop_remainder=False,
+       tf_data_experimental_slack=tf_data_experimental_slack,
+   )
+ 
+diff --git a/official/vision/image_classification/resnet/resnet_runnable.py b/official/vision/image_classification/resnet/resnet_runnable.py
+index fe3059f77..c521e5992 100644
+--- a/official/vision/image_classification/resnet/resnet_runnable.py
++++ b/official/vision/image_classification/resnet/resnet_runnable.py
+@@ -100,7 +100,7 @@ class ResnetRunnable(orbit.StandardTrainer, orbit.StandardEvaluator):
+         datasets_num_private_threads=self.flags_obj
+         .datasets_num_private_threads,
+         dtype=self.dtype,
+-        drop_remainder=True)
++        drop_remainder=False)
+     orbit.StandardTrainer.__init__(
+         self,
+         train_dataset,
diff --git a/examples/train_resnet50/hvd_configure/itex_bf16_lars.yaml b/examples/train_resnet50/hvd_configure/itex_bf16_lars.yaml
new file mode 100644
index 000000000..4930b7d42
--- /dev/null
+++ b/examples/train_resnet50/hvd_configure/itex_bf16_lars.yaml
@@ -0,0 +1,55 @@
+evaluation:
+  epochs_between_evals: 1
+model:
+  learning_rate:
+    initial_lr: 10.8
+    name: polynomial
+    warmup_epochs: 5
+  loss:
+    label_smoothing: 0.1
+  model_params:
+    rescale_inputs: true
+  name: resnet
+  optimizer:
+    epsilon: 0
+    lookahead: false
+    momentum: 0.9
+    name: lars
+    weight_decay: 1.25e-05
+model_dir: models
+runtime:
+  distribution_strategy: mirrored
+  num_gpus: 1
+train:
+  callbacks:
+    enable_checkpoint_and_export: true
+  epochs: 42
+  resume_checkpoint: true
+train_dataset:
+  batch_size: 512
+  builder: records
+  data_dir: None
+  dtype: bfloat16
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 1281167
+  one_hot: false
+  split: train
+  standardize: false
+  use_per_replica_batch_size: true
+validation_dataset:
+  batch_size: 512
+  builder: records
+  data_dir: None
+  dtype: bfloat16
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 50000
+  one_hot: false
+  split: validation
+  standardize: false
+  use_per_replica_batch_size: true
diff --git a/examples/train_resnet50/hvd_configure/itex_dummy_bf16_lars.yaml b/examples/train_resnet50/hvd_configure/itex_dummy_bf16_lars.yaml
new file mode 100644
index 000000000..95fd678d9
--- /dev/null
+++ b/examples/train_resnet50/hvd_configure/itex_dummy_bf16_lars.yaml
@@ -0,0 +1,55 @@
+evaluation:
+  epochs_between_evals: 1
+model:
+  learning_rate:
+    initial_lr: 10.8
+    name: polynomial
+    warmup_epochs: 5
+  loss:
+    label_smoothing: 0.1
+  model_params:
+    rescale_inputs: true
+  name: resnet
+  optimizer:
+    epsilon: 0
+    lookahead: false
+    momentum: 0.9
+    name: lars
+    weight_decay: 1.25e-05
+model_dir: models
+runtime:
+  distribution_strategy: mirrored
+  num_gpus: 1
+train:
+  callbacks:
+    enable_checkpoint_and_export: true
+  epochs: 1
+  resume_checkpoint: true
+train_dataset:
+  batch_size: 512
+  builder: synthetic
+  data_dir: None
+  dtype: bfloat16
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 1281167
+  one_hot: false
+  split: train
+  standardize: false
+  use_per_replica_batch_size: true
+validation_dataset:
+  batch_size: 512
+  builder: synthetic
+  data_dir: None
+  dtype: bfloat16
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 50000
+  one_hot: false
+  split: validation
+  standardize: false
+  use_per_replica_batch_size: true
diff --git a/examples/train_resnet50/hvd_configure/itex_dummy_fp32_lars.yaml b/examples/train_resnet50/hvd_configure/itex_dummy_fp32_lars.yaml
new file mode 100644
index 000000000..7577d12c6
--- /dev/null
+++ b/examples/train_resnet50/hvd_configure/itex_dummy_fp32_lars.yaml
@@ -0,0 +1,55 @@
+evaluation:
+  epochs_between_evals: 2
+model:
+  learning_rate:
+    initial_lr: 9.0
+    name: polynomial
+    warmup_epochs: 3
+  loss:
+    label_smoothing: 0.1
+  model_params:
+    rescale_inputs: true
+  name: resnet
+  optimizer:
+    epsilon: 0
+    lookahead: false
+    momentum: 0.9
+    name: lars
+    weight_decay: 1.25e-05
+model_dir: models
+runtime:
+  distribution_strategy: mirrored
+  num_gpus: 1
+train:
+  callbacks:
+    enable_checkpoint_and_export: true
+  epochs: 1
+  resume_checkpoint: true
+train_dataset:
+  batch_size: 256
+  builder: synthetic
+  data_dir: None
+  dtype: float32
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 1281167
+  one_hot: false
+  split: train
+  standardize: false
+  use_per_replica_batch_size: true
+validation_dataset:
+  batch_size: 256
+  builder: synthetic
+  data_dir: None
+  dtype: float32
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 50000
+  one_hot: false
+  split: validation
+  standardize: false
+  use_per_replica_batch_size: true
diff --git a/examples/train_resnet50/hvd_configure/itex_fp32_lars.yaml b/examples/train_resnet50/hvd_configure/itex_fp32_lars.yaml
new file mode 100644
index 000000000..18ed07b16
--- /dev/null
+++ b/examples/train_resnet50/hvd_configure/itex_fp32_lars.yaml
@@ -0,0 +1,55 @@
+evaluation:
+  epochs_between_evals: 2
+model:
+  learning_rate:
+    initial_lr: 9.0
+    name: polynomial
+    warmup_epochs: 3
+  loss:
+    label_smoothing: 0.1
+  model_params:
+    rescale_inputs: true
+  name: resnet
+  optimizer:
+    epsilon: 0
+    lookahead: false
+    momentum: 0.9
+    name: lars
+    weight_decay: 1.25e-05
+model_dir: models
+runtime:
+  distribution_strategy: mirrored
+  num_gpus: 1
+train:
+  callbacks:
+    enable_checkpoint_and_export: true
+  epochs: 39
+  resume_checkpoint: true
+train_dataset:
+  batch_size: 256
+  builder: records
+  data_dir: None
+  dtype: float32
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 1281167
+  one_hot: false
+  split: train
+  standardize: false
+  use_per_replica_batch_size: true
+validation_dataset:
+  batch_size: 512
+  builder: records
+  data_dir: None
+  dtype: float32
+  image_size: 224
+  mean_subtract: false
+  name: imagenet2012
+  num_classes: 1000
+  num_examples: 50000
+  one_hot: false
+  split: validation
+  standardize: false
+  use_per_replica_batch_size: true
diff --git a/examples/train_resnet50/pip_set_env.sh b/examples/train_resnet50/pip_set_env.sh
new file mode 100755
index 000000000..221307782
--- /dev/null
+++ b/examples/train_resnet50/pip_set_env.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#
+# Copyright (c) 2021-2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ENV_NAME=env_itex
+deactivate
+rm -rf $ENV_NAME
+python -m venv $ENV_NAME
+source $ENV_NAME/bin/activate
+pip install --upgrade pip
+pip install scikit-image
+pip install --upgrade intel-extension-for-tensorflow[xpu]