Merge pull request #4 from demarley/hepPlotter-update

HEP plotter update
demarley · Oct 24, 2018 · 40d454b · 40d454b
2 parents cc722d4 + c7cfa75
commit 40d454b
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ Clone the repository and the hepPlotter repository (Asimov uses hepPlotter to ma
 git clone https://github.com/demarley/asimov.git
 git clone https://github.com/demarley/hepPlotter.git
 cd hepPlotter/
-git checkout tags/v0.3    # current compatibility
+git checkout tags/v0.4.2    # current compatibility
 ```
 
 Please see the `examples/` directory for an example on using this framework with data from the Higgs Boson Machine Learning Challenge.

diff --git a/examples/0-simple.ipynb b/examples/0-simple.ipynb
@@ -23,9 +23,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
@@ -44,9 +42,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cwd = os.getcwd()\n",
@@ -63,22 +59,12 @@
    "execution_count": 3,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/Users/demarley/Desktop/CERN/CMS/common/asimov/examples /Users/demarley/Desktop/CERN/CMS/common/hepPlotter/python//examples\n",
-      "Welcome to JupyROOT 6.10/02\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "/Users/demarley/anaconda2/lib/python2.7/site-packages/matplotlib/style/core.py:51: UserWarning: Style includes a parameter, 'backend', that is not related to style.  Ignoring\n",
       "  \"to style.  Ignoring\".format(key))\n",
-      "/Users/demarley/root_build/lib/ROOT.py:318: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
-      "  return _orig_ihook( name, *args, **kwds )\n",
       "Using TensorFlow backend.\n"
      ]
     }
@@ -118,7 +104,7 @@
       "WARNING :: CONFIG : Using default value.\n",
       "WARNING :: CONFIG : The configuration file does not contain dnn_data\n",
       "WARNING :: CONFIG : Using default value.\n",
-      " INFO :: RUN :  Saving output to ./example/training-19Sep2018-1402/\n"
+      " INFO :: RUN :  Saving output to ./example/training-23Oct2018-2325/\n"
      ]
     }
    ],
@@ -150,9 +136,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "dnn = Training() # class to do the training"
@@ -161,14 +145,13 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "dnn.variable_labels = plb.variable_labels()  # labels for variables\n",
     "dnn.sample_labels   = plb.sample_labels()    # labels for samples\n",
     "\n",
+    "dnn.backend    = 'uproot'         # Set the backend for hepPlotter\n",
     "dnn.hep_data   = config.hep_data\n",
     "dnn.model_name = config.dnn_data\n",
     "dnn.msg_svc    = vb\n",
@@ -221,13 +204,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/demarley/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:1328: UserWarning: findfont: Font family [u'sans-serif'] not found. Falling back to DejaVu Sans\n",
+      "/Users/demarley/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:1331: UserWarning: findfont: Font family [u'sans-serif'] not found. Falling back to DejaVu Sans\n",
       "  (prop.get_family(), self.defaultFamily[fontext]))\n",
-      "/Users/demarley/Desktop/CERN/CMS/common/hepPlotter/python/histogram1D.py:236: RuntimeWarning: divide by zero encountered in divide\n",
+      "/Users/demarley/Desktop/Home/hepex/hepPlotter/python/histogram1D.py:236: RuntimeWarning: divide by zero encountered in divide\n",
       "  ratio_data.data.content = (num_data / np.sqrt(den_data)).copy()\n",
-      "/Users/demarley/Desktop/CERN/CMS/common/hepPlotter/python/histogram1D.py:236: RuntimeWarning: invalid value encountered in divide\n",
+      "/Users/demarley/Desktop/Home/hepex/hepPlotter/python/histogram1D.py:236: RuntimeWarning: invalid value encountered in divide\n",
       "  ratio_data.data.content = (num_data / np.sqrt(den_data)).copy()\n",
-      "/Users/demarley/Desktop/CERN/CMS/common/asimov/python/util.py:112: RuntimeWarning: invalid value encountered in divide\n",
+      "/Users/demarley/Desktop/Home/hepex/asimov/python/util.py:112: RuntimeWarning: invalid value encountered in divide\n",
+      "  tmp = np.divide( (sig-bkg)**2 , (sig+bkg), dtype=np.float32)\n",
+      "/Users/demarley/Desktop/Home/hepex/asimov/python/util.py:112: RuntimeWarning: divide by zero encountered in divide\n",
       "  tmp = np.divide( (sig-bkg)**2 , (sig+bkg), dtype=np.float32)\n"
      ]
     },
@@ -239,39 +224,29 @@
       " INFO :: FOUNDATION : -- pre-training :: separations\n",
       "Train on 293538 samples, validate on 97846 samples\n",
       "Epoch 1/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.5018 - acc: 0.7537 - val_loss: 0.4736 - val_acc: 0.7731\n",
+      "293538/293538 [==============================] - 30s 101us/step - loss: 0.5009 - acc: 0.7547 - val_loss: 0.4798 - val_acc: 0.7717\n",
       "Epoch 2/10\n",
-      "293538/293538 [==============================] - 8s - loss: 0.4621 - acc: 0.7815 - val_loss: 0.4504 - val_acc: 0.7884\n",
+      "293538/293538 [==============================] - 26s 89us/step - loss: 0.4588 - acc: 0.7850 - val_loss: 0.4451 - val_acc: 0.7923\n",
       "Epoch 3/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4398 - acc: 0.7958 - val_loss: 0.4311 - val_acc: 0.8016\n",
+      "293538/293538 [==============================] - 26s 89us/step - loss: 0.4381 - acc: 0.7971 - val_loss: 0.4308 - val_acc: 0.8026\n",
       "Epoch 4/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4300 - acc: 0.8011 - val_loss: 0.4214 - val_acc: 0.8069\n",
+      "293538/293538 [==============================] - 26s 89us/step - loss: 0.4306 - acc: 0.8014 - val_loss: 0.4204 - val_acc: 0.8071\n",
       "Epoch 5/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4260 - acc: 0.8027 - val_loss: 0.4300 - val_acc: 0.7984\n",
+      "293538/293538 [==============================] - 26s 89us/step - loss: 0.4267 - acc: 0.8025 - val_loss: 0.4363 - val_acc: 0.7960\n",
       "Epoch 6/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4226 - acc: 0.8044 - val_loss: 0.4203 - val_acc: 0.8045\n",
+      "293538/293538 [==============================] - 26s 89us/step - loss: 0.4230 - acc: 0.8042 - val_loss: 0.4269 - val_acc: 0.8032\n",
       "Epoch 7/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4208 - acc: 0.8055 - val_loss: 0.4189 - val_acc: 0.8071\n",
+      "293538/293538 [==============================] - 27s 92us/step - loss: 0.4213 - acc: 0.8053 - val_loss: 0.4222 - val_acc: 0.8030\n",
       "Epoch 8/10\n",
-      "293538/293538 [==============================] - 11s - loss: 0.4189 - acc: 0.8062 - val_loss: 0.4131 - val_acc: 0.8096\n",
+      "293538/293538 [==============================] - 26s 90us/step - loss: 0.4193 - acc: 0.8058 - val_loss: 0.4194 - val_acc: 0.8071\n",
       "Epoch 9/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4172 - acc: 0.8075 - val_loss: 0.4177 - val_acc: 0.8074\n",
+      "293538/293538 [==============================] - 25s 86us/step - loss: 0.4181 - acc: 0.8070 - val_loss: 0.4223 - val_acc: 0.8056\n",
       "Epoch 10/10\n",
-      "293538/293538 [==============================] - 9s - loss: 0.4154 - acc: 0.8084 - val_loss: 0.4238 - val_acc: 0.8050\n",
-      " INFO :: DL : Plot the train/test predictions\n",
+      "293538/293538 [==============================] - 26s 87us/step - loss: 0.4160 - acc: 0.8078 - val_loss: 0.4160 - val_acc: 0.8092\n",
+      " INFO :: TRAINING : Plot the train/test predictions\n",
       " INFO :: FOUNDATION : -- post-training :: ROC\n",
       " INFO :: FOUNDATION : -- post-training :: History\n"
      ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2018-09-19 14:04:16.511819: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.\n",
-      "2018-09-19 14:04:16.511837: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.\n",
-      "2018-09-19 14:04:16.511842: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.\n",
-      "2018-09-19 14:04:16.511848: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.\n"
-     ]
     }
    ],
    "source": [
@@ -297,9 +272,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from IPython.display import IFrame"
@@ -317,14 +290,14 @@
        "        <iframe\n",
        "            width=\"600\"\n",
        "            height=\"300\"\n",
-       "            src=\"./example/training-19Sep2018-1402/acc_epochs.pdf\"\n",
+       "            src=\"./example/training-23Oct2018-2325/acc_epochs.pdf\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1c37ac1150>"
+       "<IPython.lib.display.IFrame at 0x1a3bb66dd0>"
       ]
      },
      "execution_count": 9,
@@ -348,14 +321,14 @@
        "        <iframe\n",
        "            width=\"600\"\n",
        "            height=\"300\"\n",
-       "            src=\"./example/training-19Sep2018-1402/loss_epochs.pdf\"\n",
+       "            src=\"./example/training-23Oct2018-2325/loss_epochs.pdf\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1c3a816050>"
+       "<IPython.lib.display.IFrame at 0x1096eeb90>"
       ]
      },
      "execution_count": 10,
@@ -379,14 +352,14 @@
        "        <iframe\n",
        "            width=\"600\"\n",
        "            height=\"300\"\n",
-       "            src=\"./example/training-19Sep2018-1402/roc_curve.pdf\"\n",
+       "            src=\"./example/training-23Oct2018-2325/roc_curve.pdf\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1c3e187850>"
+       "<IPython.lib.display.IFrame at 0x1a3bb2cfd0>"
       ]
      },
      "execution_count": 11,
@@ -410,14 +383,14 @@
        "        <iframe\n",
        "            width=\"600\"\n",
        "            height=\"300\"\n",
-       "            src=\"./example/training-19Sep2018-1402/hist_DNN_prediction.pdf\"\n",
+       "            src=\"./example/training-23Oct2018-2325/hist_DNN_prediction.pdf\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x1c3b6cb410>"
+       "<IPython.lib.display.IFrame at 0x1a30f69090>"
       ]
      },
      "execution_count": 12,
@@ -432,9 +405,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }
@@ -455,7 +426,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.11"
+   "version": "2.7.15"
   }
  },
  "nbformat": 4,

diff --git a/python/empire.py b/python/empire.py
@@ -49,6 +49,7 @@ def __init__(self):
         self.sample_labels   = {}                               # Formatted sample labels
         self.variable_labels = {}                               # Formatted variable labels
 
+        self.backend      = 'uproot'        # backend for hepPlotter
         self.msg_svc      = util.VERBOSE()  # 'level' for printing statements
         self.output_dir   = ''              # directory/path to store the plots
         self.image_format = 'pdf'           # figure format (PDF matches with backend!)
@@ -103,6 +104,7 @@ def feature(self,dataframe,ndims=-1):
 
             hist = Histogram1D()
 
+            hist.backend = self.backend
             hist.normed  = True
             hist.stacked = False
             hist.binning = vl.binning
@@ -181,6 +183,7 @@ def feature(self,dataframe,ndims=-1):
 
                 hist = Histogram2D()
 
+                hist.backend  = self.backend
                 hist.colormap = 'default'
                 hist.colorbar['title'] = "Events"
 
@@ -304,8 +307,8 @@ def separation(self):
             separations = [self.separations['-'.join(f)]['-'.join(target)] for f in listOfFeaturePairs]
 
             # Now repeat the entries with flipped indices to get the full matrix
-            x = list(x_coord)+list(y_coord)
-            y = list(y_coord)+list(x_coord)
+            x = np.asarray(list(x_coord)+list(y_coord))
+            y = np.asarray(list(y_coord)+list(x_coord))
             separations += separations
 
             # make the plot
@@ -314,6 +317,7 @@ def separation(self):
             hist.colormap = 'default'
             hist.colorbar['title'] = "Separation"
 
+            hist.backend = self.backend
             hist.x_label = "{0} - {1}".format(target_a_label,target_b_label)
             hist.y_label = ''
             hist.binning = [range(nfeatures+1),range(nfeatures+1)]
@@ -323,7 +327,7 @@ def separation(self):
             hist.CMSlabelStatus = self.CMSlabelStatus
 
             hist.initialize()
-            hist.Add([x,y],weights=separations,name='-'.join(target))
+            hist.Add([x,y],weights=np.asarray(separations),name='-'.join(target))
 
             fig = hist.execute()
 
@@ -414,6 +418,7 @@ def _prediction(self,train_data={},test_data={},c=None):
 
         hist = Histogram1D()
 
+        hist.backend = self.backend
         hist.normed  = True  # compare shape differences (likely don't have the same event yield)
         hist.format  = self.image_format
         hist.saveAs  = "{0}/hist_DNN_prediction{1}".format(self.output_dir,target_name)

diff --git a/python/foundation.py b/python/foundation.py
@@ -70,6 +70,7 @@ def __init__(self):
         self.runDiagnostics = False     # Make plots pre/post training
         self.msg_svc = None
         self.verbose = True
+        self.backend = 'uproot'         # backend for making plots with hepPlotter
         self.equal_statistics = True    # Equal statistics for each class in the df
 
 
@@ -92,6 +93,7 @@ def initialize(self):
 
         ## -- Plotting framework
         self.plotter = Empire()  # class for plotting relevant NN information
+        self.plotter.backend      = self.backend
         self.plotter.output_dir   = self.output_dir
         self.plotter.image_format = 'pdf'
         self.plotter.features     = self.features