apache · junzhezhang · Aug 13, 2018 · Aug 13, 2018 · Aug 13, 2018 · Aug 13, 2018
diff --git a/.DS_Store b/.DS_Store
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -89,7 +89,7 @@ SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
 IF (USE_CUDA)
     include(ExternalProject)
     ExternalProject_Add(cnmem
-        GIT_REPOSITORY "https://github.com/nusdbsystem/cnmem.git"
+        GIT_REPOSITORY "https://github.com/junzhezhang/cnmem.git"
         GIT_TAG "master"
         SOURCE_DIR "cnmem/"
         CONFIGURE_COMMAND "${CMAKE_COMMAND}"

diff --git a/examples/cifar10/cnn.py → examples/cifar10/alexnet.py b/examples/cifar10/cnn.py → examples/cifar10/alexnet.py
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
@@ -31,24 +31,25 @@
 import os
 import argparse
 
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import utils
 from singa import optimizer
 from singa import device
 from singa import tensor
+from singa.proto import core_pb2
 from caffe import caffe_net
 
-import cnn
+import alexnet
 import vgg
 import resnet
 
+from datetime import datetime
+import time
 
 def load_dataset(filepath):
     print('Loading data file %s' % filepath)
     with open(filepath, 'rb') as fd:
-        try:
-            cifar10 = pickle.load(fd, encoding='latin1')
-        except TypeError:
-            cifar10 = pickle.load(fd)
+        cifar10 = pickle.load(fd)
     image = cifar10['data'].astype(dtype=np.uint8)
     image = image.reshape((-1, 3, 32, 32))
     label = np.asarray(cifar10['labels'], dtype=np.uint8)
@@ -129,24 +130,35 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.get_default_device()
     else:
         print('Using GPU')
-        dev = device.create_cuda_gpu()
+        dev = device.create_cuda_gpu_on(0)
 
     net.to_device(dev)
     opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
     for (p, specs) in zip(net.param_names(), net.param_specs()):
         opt.register(p, specs)
 
     tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
-    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
+    ty = tensor.Tensor((batch_size,), dev, core_pb2.kInt)
     train_x, train_y, test_x, test_y = data
     num_train_batch = train_x.shape[0] // batch_size
     num_test_batch = test_x.shape[0] // batch_size
     idx = np.arange(train_x.shape[0], dtype=np.int32)
-    for epoch in range(max_epoch):
+    fileTimeLog =open("epochTimeLog.text","a")
+    for epoch in range(1):
         np.random.shuffle(idx)
         loss, acc = 0.0, 0.0
         print('Epoch %d' % epoch)
-        for b in range(num_train_batch):
+        print(datetime.now().timetz()) # miliseconds
+        print(int(round(time.time()*1000)))
+        fileTimeLog.write('Epoch %d: ' % epoch)
+        fileTimeLog.write(str(int(round(time.time()*1000))))
+        fileTimeLog.write('\n')
+        for b in range(20): #num_train_batch):
+            print ("start of iteration %d: " %b)
+            #time.sleep(1)
+            fileTimeLog.write('iteration %d: ' % b)
+            fileTimeLog.write(str(int(round(time.time()*1000))))
+            fileTimeLog.write('\n')
             x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
             y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
             tx.copy_from_numpy(x)
@@ -164,7 +176,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         print(info)
 
         loss, acc = 0.0, 0.0
-        for b in range(num_test_batch):
+        for b in range(0):
             x = test_x[b * batch_size: (b + 1) * batch_size]
             y = test_y[b * batch_size: (b + 1) * batch_size]
             tx.copy_from_numpy(x)
@@ -175,14 +187,16 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
 
         print('test loss = %f, test accuracy = %f' %
               ((loss / num_test_batch), (acc / num_test_batch)))
+    fileTimeLog.close()
     net.save('model', 20)  # save model params into checkpoint file
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Train dcnn for cifar10')
-    parser.add_argument('model', choices=['vgg', 'cnn', 'resnet', 'caffe'],
-                        default='vgg')
+    parser.add_argument('model', choices=['vgg', 'alexnet', 'resnet', 'caffe'],
+                        default='alexnet')
     parser.add_argument('data', default='cifar-10-batches-py')
     parser.add_argument('--use_cpu', action='store_true')
+    parser.add_argument('batch_size',type=int, default=100)
     args = parser.parse_args()
     assert os.path.exists(args.data), \
         'Pls download the cifar10 dataset via "download_data.py py"'
@@ -194,22 +208,22 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         net = caffe_net.create_net(args.use_cpu)
         # for cifar10_full_train_test.prototxt
         train((train_x, train_y, test_x, test_y), net, 160, alexnet_lr, 0.004,
-              use_cpu=args.use_cpu)
+              use_cpu=args.use_cpu,batch_size=args.batch_size)
         # for cifar10_quick_train_test.prototxt
         # train((train_x, train_y, test_x, test_y), net, 18, caffe_lr, 0.004,
         #      use_cpu=args.use_cpu)
-    elif args.model == 'cnn':
+    elif args.model == 'alexnet':
         train_x, test_x = normalize_for_alexnet(train_x, test_x)
-        net = cnn.create_net(args.use_cpu)
+        net = alexnet.create_net(args.use_cpu)
         train((train_x, train_y, test_x, test_y), net, 2, alexnet_lr, 0.004,
-              use_cpu=args.use_cpu)
+              use_cpu=args.use_cpu,batch_size=args.batch_size)
     elif args.model == 'vgg':
         train_x, test_x = normalize_for_vgg(train_x, test_x)
         net = vgg.create_net(args.use_cpu)
         train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005,
-              use_cpu=args.use_cpu)
+              use_cpu=args.use_cpu,batch_size=args.batch_size)
     else:
         train_x, test_x = normalize_for_alexnet(train_x, test_x)
         net = resnet.create_net(args.use_cpu)
         train((train_x, train_y, test_x, test_y), net, 200, resnet_lr, 1e-4,
-              use_cpu=args.use_cpu)
+              use_cpu=args.use_cpu,batch_size=args.batch_size)
diff --git a/include/singa/core/common.h b/include/singa/core/common.h
@@ -24,7 +24,7 @@
 #include <atomic>
 #include <memory>
 #include "singa/utils/logging.h"
-
+#include <string>
 #ifdef USE_CUDA
 #include <cuda_runtime.h>
 #include <cublas_v2.h>
@@ -52,24 +52,25 @@ typedef struct _Cuda { } Cuda;
 typedef struct _Opencl { } Opencl;
 }  // namespace lang
 
+class Device;
 /// Block represent a chunk of memory (on device or host).
 class Block {
  public:
-  Block(void* ptr, size_t size, size_t offset = 0)
-      : data_(ptr), size_(size), offset_(offset) {
+  Block(void* ptr, size_t size, size_t offset = 0, Device* ptrDevice = nullptr)
+      : data_(ptr), size_(size), offset_(offset), ptrDevice_(ptrDevice) {
     ref_count_ = 1;  // std::make_shared<std::atomic<int>>(1);
   }
   // Disabled as it is not used currently.
   // Block(void* ptr, size_t size, size_t offset, std::shared_ptr<atomic<int>>
   //  ref) : data_(ptr), size_(size), offset_(offset), ref_count_(ref) {}
-  void* mutable_data() {
-    initialized_ = true;
-    return static_cast<char*>(data_) + offset_;
-  }
-  const void* data() const {
-    CHECK(initialized_) << "Must initialize data before reading it";
-    return static_cast<char*>(data_) + offset_;
-  }
+  void* mutable_data() ;
+
+  const void* data() const;
+
+  void* get_data() ;
+
+  void update_data(void* data_new) ;
+    
   size_t size() const { return size_; }
   size_t offset() const { return offset_; }
   int IncRefCount() {
@@ -89,6 +90,7 @@ class Block {
   void* data_ = nullptr;
   size_t size_ = 0;
   size_t offset_ = 0;
+  Device* ptrDevice_;
   bool initialized_ = false;
   // Disabled as it is not used currently.
   // std::shared_ptr<std::atomic<int>> ref_count_ = nullptr;
@@ -114,4 +116,4 @@ typedef struct _Context {
 } Context;
 
 }  // namespace singa
-#endif  // SINGA_CORE_COMMON_H_
+#endif  // SINGA_CORE_COMMON_H_