Skip to content

Commit

Permalink
Add support for exporting multiple notebooks
Browse files Browse the repository at this point in the history
Add support for exporting multiple notebooks by providing
multiple --export parameters.

Fixes #3
Closes #5
  • Loading branch information
lresende committed Mar 24, 2018
1 parent 9519fba commit 54142c6
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 15 deletions.
24 changes: 20 additions & 4 deletions notebook-exporter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

Enables exporting an Apache Zeppelin or Jupyter Notebook as an application that can be directly submitted to a Apache Spark cluster.

Exporting a Zeppelin Notebook
### Exporting a Zeppelin Notebook

```
java -jar ./target/scala-2.11/notebook-exporter.jar \
Expand All @@ -13,7 +13,7 @@ java -jar ./target/scala-2.11/notebook-exporter.jar \
$SPARK_HOME/bin/spark-submit --class NotebookApplication target/sample-zeppelin.jar
```

Exporting a Jupyter Notebook
### Exporting a Jupyter Notebook

```
java -jar ./target/scala-2.11/notebook-exporter.jar \
Expand All @@ -23,17 +23,33 @@ java -jar ./target/scala-2.11/notebook-exporter.jar \
$SPARK_HOME/bin/spark-submit --class NotebookApplication target/sample-jupyter.jar
```

### Exporting multiple notebooks

The `notebook-exporter` tool also supports processing multiple notebooks. In this scenario, notebooks
will be processed independently in the order they are provided.

To provide multiple notebooks, utilize multiple --export parameters:

```
java -jar ./target/scala-2.11/notebook-exporter.jar \
--type=jupyter \
--export src/main/resources/notebooks/jupyter/sample1.ipynb \
--export src/main/resources/notebooks/jupyter/sample2.ipynb \
--to target/sample-jupyter.jar
$SPARK_HOME/bin/spark-submit --class NotebookApplication target/sample-jupyter.jar
```


# Development information

## Building the project
### Building the project

The Notebook Tools is an Scala application built with SBT

```
sbt clean compile assembly
```

# Other information
## Other information

This tool was developed as part of the Operationalization efforts of the [Spark Technology Center Advisory Council](http://www.spark.tc/advisory-council/) and in close colaboration with [Rohan Sharma](https://github.com/rohancs) representing [Netflix](http://netflix.com)
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ object Application {

val usage =
"""
Usage: java -jar exporter.jar --export notebook.json application.jar
Usage: java -jar exporter.jar --export notebook.json --to application.jar
"""


Expand Down Expand Up @@ -62,19 +62,20 @@ object Application {
// currently only supporting export action
// --export notebook.json application.jar
val notebookType = parameters.get(CommandLineOption.notebookType)
val notebookLocation =
Paths.get(parameters.get(CommandLineOption.export)).toAbsolutePath.toString
val notebookLocations = parameters.getAll(CommandLineOption.export).map(
p => Paths.get(p).toAbsolutePath.toString
)
val targetApplicationLocation =
Paths.get(parameters.get(CommandLineOption.to)).toAbsolutePath.toString

println(notebookLocation)
println(notebookLocations)
println(targetApplicationLocation)

if (notebookType.equalsIgnoreCase("zeppelin")) {
var notebook = ZeppelinNotebook(notebookLocation.toString)
var notebook = ZeppelinNotebook(notebookLocations)
NotebookExporter.export(notebook, "NotebookApplication.scala", targetApplicationLocation)
} else if (notebookType.equalsIgnoreCase("jupyter")) {
var notebook = JupyterNotebook(notebookLocation.toString)
var notebook = JupyterNotebook(notebookLocations)
NotebookExporter.export(notebook, "NotebookApplication.scala", targetApplicationLocation)
} else {
println("Invlid notebook type :" + notebookType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ class CommandLineParser(args: Seq[String]) {

def getAll(key: String): List[String] = {
return scala.collection.JavaConversions.asScalaBuffer(options.valuesOf(key)).
toList.
asInstanceOf[List[String]]
toList.asInstanceOf[List[String]]
}

def has[T](spec: OptionSpec[T]): Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,21 @@ case class Paragraph (id: String, title: Option[String], text: String)

object ZeppelinNotebook {

def apply(notebookPath: String): Notebook = ({
def apply(notebookPath: String): Notebook = parseNotebook(notebookPath)

def apply(multipleNotebookPaths: List[String]): Notebook = ({
var paragraphs: List[Paragraph] = List[Paragraph]()
multipleNotebookPaths.foreach( n => {
val tmpNotebook: Notebook = parseNotebook(n)
paragraphs = paragraphs ::: tmpNotebook.paragraphs
}
)

var notebook: Notebook = Notebook("Notebook Pipeline", "1", paragraphs)
notebook
})

def parseNotebook(notebookPath: String): Notebook = ({
if (Files.exists(Paths.get(notebookPath)) == false) {
throw new IOException("Notebook does not exist: '" + notebookPath + "'")
}
Expand All @@ -50,8 +64,23 @@ case class JypnbParagraph (cell_type: String, source: List[String])

object JupyterNotebook {

def apply(notebookPath: String): Notebook = ({
def apply(notebookPath: String): Notebook = parseNotebook(notebookPath)


def apply(multipleNotebookPaths: List[String]): Notebook = ({
var paragraphs: List[Paragraph] = List[Paragraph]()
multipleNotebookPaths.foreach( n => {
val tmpNotebook: Notebook = parseNotebook(n)
paragraphs = paragraphs ::: tmpNotebook.paragraphs
}
)

var notebook: Notebook = Notebook("Notebook Pipeline", "1", paragraphs)
notebook
})


private def parseNotebook(notebookPath: String): Notebook = ({
if (Files.exists(Paths.get(notebookPath)) == false) {
throw new IOException("Notebook does not exist: '" + notebookPath + "'")
}
Expand All @@ -62,7 +91,7 @@ object JupyterNotebook {
val jupyterNotebook = jsonNote.extract[JypnbNotebook]

var id = 0
var paragraphs = List[Paragraph]()
var paragraphs: List[Paragraph] = List[Paragraph]()
jupyterNotebook.cells.foreach(cell => {
if (cell.cell_type.equalsIgnoreCase("code")) {
id += 1
Expand Down
54 changes: 54 additions & 0 deletions notebook-exporter/src/test/resources/notebooks/jupyter/hello.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2018-03-24T06:51:36.995868Z",
"start_time": "2018-03-24T06:51:36.683Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"msg: String = hello\n",
"user: String = john\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"val msg = \"hello\"\n",
"val user = \"john\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Spark 2.2.0 - Scala",
"language": "scala",
"name": "spark_2.2.0_scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"mimetype": "text/x-scala",
"name": "scala",
"pygments_lexer": "scala",
"version": "2.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(hello + \" \" + user)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Spark 2.2.0 - Scala",
"language": "scala",
"name": "spark_2.2.0_scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"mimetype": "text/x-scala",
"name": "scala",
"pygments_lexer": "scala",
"version": "2.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,23 @@ class CommandLineParserSpec extends FlatSpec with Matchers {
export should be ("notebook.json")
to should be ("application.jar")
}

it should "properly parse export commandline option with multiple notebooks" in {
val expected = List("--export", "notebook1.json",
"--export", "notebook2.json",
"--to", "application.jar")

val parameters = new CommandLineParser(
expected
)

val export = parameters.getAll("export")
val to = parameters.get("to")

println(export) //scalastyle:ignore
println(to) //scalastyle:ignore

export should be(List[String]("notebook1.json", "notebook2.json"))
to should be("application.jar")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,15 @@ class NotebookExporterSpec extends FlatSpec {
"NotebookApplication.scala",
"target/jupyter-generated-application.jar")
}

it should "generate a valid jar file from multiple Jupyter Notebook" in {
val notebookPath1 = getClass().getResource("/notebooks/jupyter/hello.ipynb").getPath
val notebookPath2 = getClass().getResource("/notebooks/jupyter/helloworld.ipynb").getPath
val notebookPaths = notebookPath1 :: notebookPath2 :: Nil

val notebook = JupyterNotebook(notebookPaths)
NotebookExporter.export(notebook,
"NotebookApplication.scala",
"target/jupyter-generated-application.jar")
}
}

0 comments on commit 54142c6

Please sign in to comment.