diff --git a/.appveyor.yml b/.appveyor.yml
index e14cc62a08e..52395be924a 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -17,17 +17,33 @@
# under the License.
#
-version: '1.0.0-dev.{build}'
+version: '0.12.0.{build}'
shallow_clone: true
-build: off
+branches:
+ except:
+ - /dependabot/
+
+platform:
+ - x64
+
+build: Script
os:
- - Visual Studio 2015
+ - Visual Studio 2022
+
+environment:
+ APPVEYOR_SAVE_CACHE_ON_ERROR: True
-install:
- - echo "Install"
+cache:
+ - '%USERPROFILE%/.m2'
build_script:
- - echo "Build"
+ - cmd: set JAVA_HOME=C:\Program Files\Java\jdk1.8.0
+ - cmd: >-
+ ./mvnw.cmd clean package -DskipTests ^
+ -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade.mojo.ShadeMojo=warn ^
+ -Dorg.slf4j.simpleLogger.log.com.googlecode.download.maven.plugin.internal.WGet=warn ^
+ -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency.fromDependencies.CopyDependenciesMojo=warn ^
+ --no-transfer-progress
diff --git a/.asf.yaml b/.asf.yaml
index 99a643f17a8..44528f62823 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-# https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories
+# https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features
github:
description: "Web-based notebook that enables data-driven, interactive data analytics and collaborative documents with SQL, Scala and more."
@@ -30,3 +30,20 @@ github:
- big-data
- zeppelin
- javascript
+ enabled_merge_buttons:
+ merge: false
+ squash: true
+ rebase: false
+ protected_branches:
+ master:
+ required_pull_request_reviews:
+ dismiss_stale_reviews: true
+ required_approving_review_count: 1
+ autolink_jira:
+ - ZEPPELIN
+
+notifications:
+ commits: commits@zeppelin.apache.org
+ issues: reviews@zeppelin.apache.org
+ pullrequests: reviews@zeppelin.apache.org
+ jira_options: link label
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000000..cbdcbbc258e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.js text eol=lf
diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
index addaf21cbb3..25167143d03 100644
--- a/.github/PULL_REQUEST_TEMPLATE
+++ b/.github/PULL_REQUEST_TEMPLATE
@@ -4,7 +4,13 @@ First time? Check out the contributing guide - https://zeppelin.apache.org/contr
### What type of PR is it?
-[Bug Fix | Improvement | Feature | Documentation | Hot Fix | Refactoring]
+Bug Fix
+Improvement
+Feature
+Documentation
+Hot Fix
+Refactoring
+*Please leave your type of PR only*
### Todos
* [ ] - Task
@@ -20,6 +26,6 @@ First time? Check out the contributing guide - https://zeppelin.apache.org/contr
### Screenshots (if appropriate)
### Questions:
-* Does the licenses files need update?
+* Does the license files need to update?
* Is there breaking changes for older versions?
* Does this needs documentation?
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index fdf28ff0e64..b7004b15daa 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -1,20 +1,24 @@
name: core
+
on:
push:
+ branches-ignore:
+ - 'dependabot/**'
pull_request:
branches:
- master
- - branch-*
- types: [opened, synchronize]
+ - 'branch-*'
env:
# Disable keepAlive and pool
# https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
MAVEN_OPTS: >-
- -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
+ -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit
-Dhttp.keepAlive=false
-Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.count=3
+ MAVEN_ARGS: >-
+ -B --no-transfer-progress
ZEPPELIN_HELIUM_REGISTRY: helium
SPARK_PRINT_LAUNCH_COMMAND: "true"
SPARK_LOCAL_IP: 127.0.0.1
@@ -25,383 +29,470 @@ defaults:
run:
shell: bash -l {0}
+permissions:
+ contents: read # to fetch code (actions/checkout)
+
jobs:
- test-core-modules:
+ # test on core-modules (zeppelin-interpreter,zeppelin-zengine,zeppelin-server),
+ # some interpreters are included, because zeppelin-server test depends on them: spark, shell & markdown
+ core-modules:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
- hadoop: [hadoop2, hadoop3]
+ hadoop: [hadoop3]
+ java: [ 8, 11 ]
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
+ ~/conda_pkgs_dir
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: install application with some interpreter
+ run: ./mvnw install -Pbuild-distr -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,angular,shell -am -Pweb-classic -Phelium-dev -Pexamples -P${{ matrix.hadoop }} ${MAVEN_ARGS}
+ - name: install and test plugins
+ run: ./mvnw package -pl zeppelin-plugins -amd ${MAVEN_ARGS}
+ - name: Setup conda environment with python 3.9 and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
- environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ environment-file: testing/env_python_3.9_with_R.yml
+ python-version: 3.9
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
- channel-priority: strict
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
conda list
conda info
- - name: install application with some interpreter
- run: mvn install -Pbuild-distr -DskipRat -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -B
- - name: install and test plugins
- run: mvn package -DskipRat -pl zeppelin-plugins -amd -B
- - name: run tests with ${{ matrix.hadoop }}
- run: mvn verify -Pusing-packaged-distr -DskipRat -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false
- test-interpreter-modules:
+ - name: run tests with ${{ matrix.hadoop }} # skip spark test because we would run them in other CI
+ run: ./mvnw verify -Pusing-packaged-distr -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,angular,shell -am -Pweb-classic -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false
+
+ # test interpreter modules except spark, flink, python, rlang, jupyter
+ interpreter-test-non-core:
runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ java: [ 8, 11 ]
env:
- INTERPRETERS: 'beam,hbase,pig,jdbc,file,flink,flink-cmd,ignite,kylin,lens,cassandra,elasticsearch,bigquery,alluxio,scio,livy,groovy,sap,java,geode,neo4j,hazelcastjet,submarine,sparql,mongodb'
+ INTERPRETERS: 'hbase,jdbc,file,flink-cmd,cassandra,elasticsearch,bigquery,alluxio,livy,groovy,java,neo4j,sparql,mongodb,influxdb,shell'
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: install environment
+ run: ./mvnw install -DskipTests -am -pl ${INTERPRETERS} ${MAVEN_ARGS}
+ - name: Setup conda environment with python 3.9 and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R_and_tensorflow
environment-file: testing/env_python_3_with_R_and_tensorflow.yml
- python-version: 3.7
+ python-version: 3.9
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
- - name: Make IRkernel available to Jupyter
- run: |
- R -e "IRkernel::installspec()"
+ use-mamba: true
- name: verify interpreter
- run: mvn verify -DskipRat -am -pl .,zeppelin-interpreter,zeppelin-interpreter-shaded,${INTERPRETERS} -Pscala-2.10 -B
- test-zeppelin-client-integration-test:
+ run: ./mvnw verify -am -pl ${INTERPRETERS} ${MAVEN_ARGS}
+
+ # test interpreter modules for jupyter, python, rlang
+ interpreter-test-jupyter-python-rlang:
runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python: [ 3.9 ]
+ java: [ 8, 11 ]
+ include:
+ - python: 3.7
+ java: 8
+ - python: 3.8
+ java: 8
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: Setup conda environment with python ${{ matrix.python }} and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
- environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ environment-file: testing/env_python_${{ matrix.python }}_with_R.yml
+ python-version: ${{ matrix.python }}
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- name: install environment
run: |
- mvn install -DskipTests -DskipRat -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,flink-cmd,flink/flink-scala-2.11,flink/flink-scala-2.12,jdbc,shell -am
- mvn package -DskipRat -pl zeppelin-plugins -amd -DskipTests -B
- - name: run tests
- run: mvn test -DskipRat -pl zeppelin-interpreter-integration -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest
- test-flink-and-flink-integration-test:
+ ./mvnw install -DskipTests -pl python,rlang,zeppelin-jupyter-interpreter -am -Phadoop3 ${MAVEN_ARGS}
+ - name: run tests with ${{ matrix.python }}
+ run: |
+ ./mvnw test -pl python,rlang,zeppelin-jupyter-interpreter -DfailIfNoTests=false ${MAVEN_ARGS}
+
+ # zeppelin integration test except Spark & Flink
+ zeppelin-integration-test:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
- flink: [110, 111, 112, 113]
+ java: [ 8, 11 ]
steps:
+ # user/password => root/root
+ - name: Start mysql
+ run: sudo systemctl start mysql.service
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and
- uses: conda-incubator/setup-miniconda@v2
- with:
- activate-environment: python_3_with_flink
- environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml
- python-version: 3.7
- auto-activate-base: false
- name: install environment
run: |
- mvn install -DskipTests -DskipRat -am -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Pintegration -B
- mvn clean package -pl zeppelin-plugins -amd -DskipTests -B
- - name: run tests
- run: mvn test -DskipRat -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Pintegration -DfailIfNoTests=false -B -Dtest=org.apache.zeppelin.flink.*,FlinkIntegrationTest${{ matrix.flink }},ZeppelinFlinkClusterTest${{ matrix.flink }}
- run-spark-intergration-test:
- runs-on: ubuntu-20.04
- steps:
- - name: Checkout
- uses: actions/checkout@v2
- - name: Tune Runner VM
- uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
- with:
- distribution: 'adopt'
- java-version: 8
- - name: Cache local Maven repository
- uses: actions/cache@v2
- with:
- path: |
- ~/.m2/repository
- !~/.m2/repository/org/apache/zeppelin/
- key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ ./mvnw install -DskipTests -Phadoop3 -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,flink-cmd,flink/flink-scala-2.12,jdbc,shell -am -Pweb-classic -Pflink-117 ${MAVEN_ARGS}
+ ./mvnw package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS}
+ - name: Setup conda environment with python 3.9 and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ python-version: 3.9
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- - name: install environment
- run: |
- mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B
- mvn clean package -pl zeppelin-plugins -amd -DskipTests -B
- name: run tests
- run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,SparkIntegrationTest30 -DfailIfNoTests=false
- jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11:
+ run: ./mvnw test -pl zeppelin-interpreter-integration -Phadoop3 -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest,ShellIntegrationTest,JdbcIntegrationTest
+ - name: Print zeppelin logs
+ if: always()
+ run: if [ -d "logs" ]; then cat logs/*; fi
+
+ flink-test-and-flink-integration-test:
runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python: [ 3.9 ]
+ flink: [116, 117]
+ include:
+ # Flink 1.15 supports Python 3.6, 3.7, and 3.8
+ # https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/python/installation/
+ - python: 3.8
+ flink: 115
steps:
- # user/password => root/root
- - name: Start mysql
- run: sudo systemctl start mysql.service
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK 8
- uses: actions/setup-java@v2
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
+ distribution: 'temurin'
java-version: 8
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: install environment for flink
+ run: |
+ ./mvnw install -DskipTests -am -pl flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Phadoop3 -Pintegration ${MAVEN_ARGS}
+ ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS}
+ - name: Setup conda environment with python ${{ matrix.python }} and R
+ uses: conda-incubator/setup-miniconda@v3
with:
- activate-environment: python_3_with_R
- environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ activate-environment: python_3_with_flink
+ environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml
+ python-version: ${{ matrix.python }}
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
- - name: Make IRkernel available to Jupyter
- run: |
- R -e "IRkernel::installspec()"
- - name: install environment
- run: |
- mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B
- mvn clean package -pl zeppelin-plugins -amd -DskipTests -B
- - name: run tests
- run: mvn test -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B -Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false
+ use-mamba: true
+ - name: run tests for flink
+ run: ./mvnw verify -pl flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -am -Phadoop3 -Pintegration -DfailIfNoTests=false -Dtest=org.apache.zeppelin.flink.*Test,FlinkIntegrationTest${{ matrix.flink }} ${MAVEN_ARGS}
+ - name: Print zeppelin logs
+ if: always()
+ run: if [ -d "logs" ]; then cat logs/*; fi
- spark-2-4-and-scale-2-12:
+
+ spark-integration-test:
runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ hadoop: [ 3 ]
+ java: [ 8, 11 ]
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: install environment
+ run: |
+ ./mvnw install -DskipTests -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown -am -Pweb-classic -Phadoop3 -Pintegration ${MAVEN_ARGS}
+ ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS}
+ - name: Setup conda environment with python 3.9 and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ python-version: 3.9
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- - name: install environment
- run: |
- mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B
- - name: run tests
- run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false
+ - name: run tests on hadoop${{ matrix.hadoop }}
+ run: ./mvnw test -pl zeppelin-interpreter-integration -Phadoop${{ matrix.hadoop }} -Pintegration -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest32,SparkIntegrationTest32,ZeppelinSparkClusterTest33,SparkIntegrationTest33 -DfailIfNoTests=false ${MAVEN_ARGS}
- spark-2-3-and-scale-2-11-and-other-interpreter:
+ # test on spark for each spark version & scala version
+ spark-test:
runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ python: [ 3.9 ]
+ java: [ 8, 11 ]
+ include:
+ - python: 3.7
+ java: 8
+ - python: 3.8
+ java: 8
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: install environment
+ run: ./mvnw install -DskipTests -pl spark-submit,spark/scala-2.12,spark/scala-2.13 -am -Phadoop3 ${MAVEN_ARGS}
+ - name: Setup conda environment with python ${{ matrix.python }} and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
- environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ environment-file: testing/env_python_${{ matrix.python }}_with_R.yml
+ python-version: ${{ matrix.python }}
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- - name: install environment
+ - name: run spark-3.3 tests with scala-2.12 and python-${{ matrix.python }}
run: |
- mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B
- - name: run tests
- run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+ rm -rf spark/interpreter/metastore_db
+ ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.3 -Pspark-scala-2.12 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
+ - name: run spark-3.3 tests with scala-2.13 and python-${{ matrix.python }}
+ run: |
+ rm -rf spark/interpreter/metastore_db
+ ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.3 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
+ - name: run spark-3.4 tests with scala-2.13 and python-${{ matrix.python }}
+ run: |
+ rm -rf spark/interpreter/metastore_db
+ ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.4 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
+ - name: run spark-3.5 tests with scala-2.13 and python-${{ matrix.python }}
+ if: matrix.python >= '3.8'
+ run: |
+ rm -rf spark/interpreter/metastore_db
+ ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.5 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
- spark-2-2-and-scale-2-10-and-other-interpreter:
+ # The version combination is based on the facts:
+ # 1. official Livy 0.8 binary tarball is built against Spark 2.4
+ # 2. official Spark 2.4 binary tarball is built against Scala 2.11
+ # 3. Spark 2.4 support Python 2.7, 3.4 to 3.7
+ livy-0-8-with-spark-2-4-under-python37:
runs-on: ubuntu-20.04
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK 8
- uses: actions/setup-java@v2
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
+ distribution: 'temurin'
java-version: 8
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
+ - name: install environment
+ run: |
+ ./mvnw install -DskipTests -pl livy -am ${MAVEN_ARGS}
+ ./testing/downloadSpark.sh "2.4.8" "2.7"
+ ./testing/downloadLivy.sh "0.8.0-incubating" "2.11"
- name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v3
with:
- activate-environment: python_3_with_R
- environment-file: testing/env_python_3_with_R.yml
+ activate-environment: python_37_with_R
+ environment-file: testing/env_python_3.7_with_R.yml
python-version: 3.7
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- - name: install environment
- run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B
- name: run tests
- run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
- test-livy-0-5-with-spark-2-2-0-under-python3:
+ run: |
+ export SPARK_HOME=$PWD/spark-2.4.8-bin-hadoop2.7
+ export LIVY_HOME=$PWD/apache-livy-0.8.0-incubating_2.11-bin
+ ./mvnw verify -pl livy -am ${MAVEN_ARGS}
+
+ default-build:
runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ java: [ 8, 11 ]
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: ${{ matrix.java }}
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
- with:
- activate-environment: python_3_with_R
- environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
- auto-activate-base: false
- - name: Make IRkernel available to Jupyter
- run: |
- R -e "IRkernel::installspec()"
- - name: install environment
- run: |
- mvn install -DskipTests -DskipRat -pl livy -am -B
- ./testing/downloadSpark.sh "2.2.0" "2.6"
- ./testing/downloadLivy.sh "0.5.0-incubating"
- - name: run tests
- run: mvn verify -DskipRat -pl livy -am -B
+ - name: build without any profiles
+ run: ./mvnw clean verify -DskipTests ${MAVEN_ARGS}
diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml
index 9fb1f8b539c..3ebc68594df 100644
--- a/.github/workflows/frontend.yml
+++ b/.github/workflows/frontend.yml
@@ -1,114 +1,137 @@
name: frontend
+
on:
push:
+ branches-ignore:
+ - 'dependabot/**'
pull_request:
branches:
- master
- - branch-*
- types: [opened, synchronize]
+ - 'branch-*'
env:
# Disable keepAlive and pool
# https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
MAVEN_OPTS: >-
- -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
+ -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit
-Dhttp.keepAlive=false
-Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.count=3
+ MAVEN_ARGS: >-
+ -B --no-transfer-progress
ZEPPELIN_HELIUM_REGISTRY: helium
SPARK_PRINT_LAUNCH_COMMAND: "true"
SPARK_LOCAL_IP: 127.0.0.1
ZEPPELIN_LOCAL_IP: 127.0.0.1
- INTERPRETERS: '!beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql,!mongodb'
+ INTERPRETERS: '!hbase,!jdbc,!file,!flink,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!java,!neo4j,!sparql,!mongodb'
+
+permissions:
+ contents: read # to fetch code (actions/checkout)
jobs:
run-e2e-tests-in-zeppelin-web:
runs-on: ubuntu-20.04
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK 11
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: 11
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: Install application
- run: mvn -B install -DskipTests -DskipRat -pl ${INTERPRETERS} -Phadoop2 -Pscala-2.11
+ run: ./mvnw clean install -DskipTests -am -pl zeppelin-web -Pweb-classic -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist ${MAVEN_ARGS}
- name: Run headless test
- run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" mvn verify -DskipRat -pl zeppelin-web -Phadoop2 -Pscala-2.11 -Pweb-e2e -B
+ run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" ./mvnw verify -pl zeppelin-web -Pweb-classic -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist -Pweb-e2e ${MAVEN_ARGS}
+ - name: Print zeppelin logs
+ if: always()
+ run: if [ -d "logs" ]; then cat logs/*; fi
+
run-tests-in-zeppelin-web-angular:
runs-on: ubuntu-20.04
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK 11
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: 11
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: Run headless test
- run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" mvn package -DskipRat -pl zeppelin-web-angular -Pweb-angular -B
+ run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" ./mvnw package -pl zeppelin-web-angular ${MAVEN_ARGS}
- test-selenium-with-spark-module-for-spark-2-3:
+ test-selenium-with-spark-module-for-spark-3-4:
runs-on: ubuntu-20.04
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- - name: Set up JDK 8
- uses: actions/setup-java@v2
+ - name: Set up JDK 11
+ uses: actions/setup-java@v4
with:
- distribution: 'adopt'
- java-version: 8
+ distribution: 'temurin'
+ java-version: 11
- name: Cache local Maven repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
+ ~/.spark-dist
+ ~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- - name: Setup conda environment with python 3.7 and R
- uses: conda-incubator/setup-miniconda@v2
+ - name: Setup conda environment with python 3.9 and R
+ uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_3_with_R.yml
- python-version: 3.7
+ python-version: 3.9
+ miniforge-variant: Mambaforge
+ channels: conda-forge,defaults
+ channel-priority: true
auto-activate-base: false
+ use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- - name: install environment
+ - name: Install Environment
run: |
- mvn clean install -DskipTests -DskipRat -pl ${INTERPRETERS} -Pspark-2.3 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B
- mvn clean package -pl zeppelin-plugins -amd -B
- ./testing/downloadSpark.sh "2.3.2" "2.6"
+ ./mvnw clean install -DskipTests -am -pl zeppelin-integration -Pweb-classic -Pintegration -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist ${MAVEN_ARGS}
- name: run tests
- run: xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" mvn verify -DskipRat -Pspark-2.3 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B -pl zeppelin-integration -DfailIfNoTests=false
+ run: |
+ xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" ./mvnw verify -DfailIfNoTests=false -pl zeppelin-integration -Pweb-classic -Pintegration -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist -Pusing-source-tree ${MAVEN_ARGS}
+ - name: Print zeppelin logs
+ if: always()
+ run: if [ -d "logs" ]; then cat logs/*; fi
diff --git a/.github/workflows/quick.yml b/.github/workflows/quick.yml
new file mode 100644
index 00000000000..b26f015c6b4
--- /dev/null
+++ b/.github/workflows/quick.yml
@@ -0,0 +1,54 @@
+name: quick
+
+on:
+ push:
+ branches-ignore:
+ - 'dependabot/**'
+ pull_request:
+ branches:
+ - master
+ - 'branch-*'
+
+permissions:
+ contents: read
+
+env:
+ # Disable keepAlive and pool
+ # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
+ MAVEN_OPTS: >-
+ -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit
+ -Dhttp.keepAlive=false
+ -Dmaven.wagon.http.pool=false
+ -Dmaven.wagon.http.retryHandler.count=3
+ MAVEN_ARGS: >-
+ -B --no-transfer-progress
+
+jobs:
+ license-check:
+ runs-on: ubuntu-20.04
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Set up JDK 11
+ uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin'
+ java-version: 11
+ - name: Check Rat
+ run: ./mvnw apache-rat:check -Prat ${MAVEN_ARGS}
+ maven-validate:
+ runs-on: ubuntu-20.04
+ strategy:
+ fail-fast: false
+ matrix:
+ hadoop: [hadoop3]
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Set up JDK 11
+ uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin'
+ java-version: 11
+ - name: Run Maven Validate
+ run: ./mvnw validate -P${{ matrix.hadoop }} -Pinclude-hadoop ${MAVEN_ARGS}
diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml
deleted file mode 100644
index b32cb598e86..00000000000
--- a/.github/workflows/rat.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: rat
-on:
- push:
- pull_request:
- branches:
- - master
- - branch-*
- types: [opened, synchronize]
-
-jobs:
- license-check:
- runs-on: ubuntu-20.04
- env:
- # Disable keepAlive and pool
- # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
- MAVEN_OPTS: >-
- -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
- -Dhttp.keepAlive=false
- -Dmaven.wagon.http.pool=false
- -Dmaven.wagon.http.retryHandler.count=3
- steps:
- - name: Checkout
- uses: actions/checkout@v2
- - name: Set up JDK 8
- uses: actions/setup-java@v2
- with:
- distribution: 'adopt'
- java-version: 8
- - name: Check Rat
- run: mvn apache-rat:check -Prat -B
diff --git a/.gitignore b/.gitignore
index 618075d1de7..61ae7eb6a67 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,13 +10,16 @@
/interpreter/*
!/interpreter/lib
+# metals
+.bloop
+.metals
+
# interpreter temp files
derby.log
spark/metastore_db
spark-1.*-bin-hadoop*
.spark-dist
-lens/lens-cli-hist.log
# Zeppelin server
zeppelin-server/local-repo
@@ -31,7 +34,6 @@ conf/keystore
conf/truststore
conf/interpreter.json
conf/notebook-authorization.json
-conf/shiro.ini
conf/credentials.json
conf/helium.json
@@ -56,8 +58,6 @@ zeppelin-web/yarn.lock
.Rhistory
/R/
-# scio
-.bigquery/
# project level
/logs/
@@ -70,6 +70,7 @@ zeppelin-web/yarn.lock
/warehouse/
/notebook/
/local-repo/
+/notebook_*/
**/sessions/
**/data/
@@ -100,6 +101,12 @@ Thumbs.db
.idea/
*.iml
+# Jetbrains Fleet project files
+.fleet/
+
+# vscode project files
+.vscode/
+
# maven target files
target/
**/target/
@@ -107,7 +114,7 @@ target/
# maven flattened pom files
**/.flattened-pom.xml
-# Generated by Jekyll
+# Generated by Jekyll
docs/_site/
*~
@@ -129,3 +136,9 @@ tramp
# Git properties
**/git.properties
+
+# jEnv file
+.java-version
+
+# pyenv file
+.python-version
diff --git a/.mvn/wrapper/MavenWrapperDownloader.java b/.mvn/wrapper/MavenWrapperDownloader.java
new file mode 100644
index 00000000000..b901097f2db
--- /dev/null
+++ b/.mvn/wrapper/MavenWrapperDownloader.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2007-present the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.net.*;
+import java.io.*;
+import java.nio.channels.*;
+import java.util.Properties;
+
+public class MavenWrapperDownloader {
+
+ private static final String WRAPPER_VERSION = "0.5.6";
+ /**
+ * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
+ */
+ private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
+ + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
+
+ /**
+ * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
+ * use instead of the default one.
+ */
+ private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
+ ".mvn/wrapper/maven-wrapper.properties";
+
+ /**
+ * Path where the maven-wrapper.jar will be saved to.
+ */
+ private static final String MAVEN_WRAPPER_JAR_PATH =
+ ".mvn/wrapper/maven-wrapper.jar";
+
+ /**
+ * Name of the property which should be used to override the default download url for the wrapper.
+ */
+ private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
+
+ public static void main(String args[]) {
+ System.out.println("- Downloader started");
+ File baseDirectory = new File(args[0]);
+ System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
+
+ // If the maven-wrapper.properties exists, read it and check if it contains a custom
+ // wrapperUrl parameter.
+ File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
+ String url = DEFAULT_DOWNLOAD_URL;
+ if(mavenWrapperPropertyFile.exists()) {
+ FileInputStream mavenWrapperPropertyFileInputStream = null;
+ try {
+ mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
+ Properties mavenWrapperProperties = new Properties();
+ mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
+ url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
+ } catch (IOException e) {
+ System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
+ } finally {
+ try {
+ if(mavenWrapperPropertyFileInputStream != null) {
+ mavenWrapperPropertyFileInputStream.close();
+ }
+ } catch (IOException e) {
+ // Ignore ...
+ }
+ }
+ }
+ System.out.println("- Downloading from: " + url);
+
+ File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
+ if(!outputFile.getParentFile().exists()) {
+ if(!outputFile.getParentFile().mkdirs()) {
+ System.out.println(
+ "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
+ }
+ }
+ System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
+ try {
+ downloadFileFromURL(url, outputFile);
+ System.out.println("Done");
+ System.exit(0);
+ } catch (Throwable e) {
+ System.out.println("- Error downloading");
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ private static void downloadFileFromURL(String urlString, File destination) throws Exception {
+ if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
+ String username = System.getenv("MVNW_USERNAME");
+ char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
+ Authenticator.setDefault(new Authenticator() {
+ @Override
+ protected PasswordAuthentication getPasswordAuthentication() {
+ return new PasswordAuthentication(username, password);
+ }
+ });
+ }
+ URL website = new URL(urlString);
+ ReadableByteChannel rbc;
+ rbc = Channels.newChannel(website.openStream());
+ FileOutputStream fos = new FileOutputStream(destination);
+ fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
+ fos.close();
+ rbc.close();
+ }
+
+}
diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar
new file mode 100644
index 00000000000..2cc7d4a55c0
Binary files /dev/null and b/.mvn/wrapper/maven-wrapper.jar differ
diff --git a/scripts/docker/submarine/1.0.0/zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2/build.sh b/.mvn/wrapper/maven-wrapper.properties
similarity index 78%
rename from scripts/docker/submarine/1.0.0/zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2/build.sh
rename to .mvn/wrapper/maven-wrapper.properties
index 0eccdf4596b..7e83fe97d7d 100644
--- a/scripts/docker/submarine/1.0.0/zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2/build.sh
+++ b/.mvn/wrapper/maven-wrapper.properties
@@ -1,4 +1,3 @@
-#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
@@ -16,4 +15,5 @@
# limitations under the License.
#
-docker build -t zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2:1.0.0 .
+distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip
+wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
diff --git a/Dockerfile b/Dockerfile
index 28606ea4cfb..6f1777e0862 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,19 +14,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-FROM maven:3.5-jdk-8 as builder
+FROM openjdk:11 as builder
ADD . /workspace/zeppelin
WORKDIR /workspace/zeppelin
+ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
# Allow npm and bower to run with root privileges
RUN echo "unsafe-perm=true" > ~/.npmrc && \
echo '{ "allow_root": true }' > ~/.bowerrc && \
- mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular && \
+ ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.4 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist && \
# Example with doesn't compile all interpreters
- # mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -pl '!groovy,!submarine,!livy,!hbase,!pig,!file,!flink,!ignite,!kylin,!lens' && \
- mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \
+ # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.4 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist -pl '!groovy,!livy,!hbase,!file,!flink' && \
+ mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*-bin/zeppelin-*-bin /opt/zeppelin/ && \
# Removing stuff saves time, because docker creates a temporary layer
rm -rf ~/.m2 && \
rm -rf /workspace/zeppelin/*
-FROM ubuntu:20.04
+FROM ubuntu:22.04
COPY --from=builder /opt/zeppelin /opt/zeppelin
diff --git a/LICENSE b/LICENSE
index 7f759130ca3..2efdc5dbb05 100644
--- a/LICENSE
+++ b/LICENSE
@@ -265,7 +265,6 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(Apache 2.0) Google Cloud Client Library for Java (https://github.com/GoogleCloudPlatform/google-cloud-java)
(Apache 2.0) concurrentunit (https://github.com/jhalterman/concurrentunit)
(Apache 2.0) Embedded MongoDB (https://github.com/flapdoodle-oss/de.flapdoodle.embed.mongo)
- (Apache 2.0) Kotlin (https://github.com/JetBrains/kotlin)
(Apache 2.0) s3proxy (https://github.com/gaul/s3proxy)
(Apache 2.0) kubernetes-client (https://github.com/fabric8io/kubernetes-client)
diff --git a/NOTICE b/NOTICE
index 52de5bfc89f..63e12dbd6c8 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,8 +1,8 @@
Apache Zeppelin
-Copyright 2015 - 2016 The Apache Software Foundation
+Copyright 2015 - 2024 The Apache Software Foundation
This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
+The Apache Software Foundation (https://www.apache.org/).
Portions of this software were developed at NFLabs, Inc. (http://www.nflabs.com)
diff --git a/STYLE.md b/STYLE.md
index 8182301b5a5..b0a5f2a084a 100644
--- a/STYLE.md
+++ b/STYLE.md
@@ -7,7 +7,7 @@ app/styles/looknfeel
Overall look and theme of the Zeppelin notebook page can be customized here.
### Code Syntax Highlighting
-There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls pegdown parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js.
+There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls flexmark parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js.
#### Theme on Ace Editor
app/scripts/controllers/paragraph.js
@@ -16,7 +16,7 @@ Call setTheme on the editor with the theme path/name.
[List of themes on GitHub](https://github.com/ajaxorg/ace/tree/master/lib/ace/theme)
#### Style for Markdown Code Blocks
-Highlight.js parses and converts <pre><code> blocks from pegdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build.
+Highlight.js parses and converts <pre><code> blocks from markdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build.
Note that code block background color is overriden in app/styles/notebook.css (look for .paragraph .tableDisplay .hljs).
diff --git a/_tools/maven-4.0.0.xsd b/_tools/maven-4.0.0.xsd
deleted file mode 100644
index f3a36834a2c..00000000000
--- a/_tools/maven-4.0.0.xsd
+++ /dev/null
@@ -1,2484 +0,0 @@
-
-
-
-
-
-
-
-
- 3.0.0+
-
-
- The <code><project></code> element is the root of the descriptor.
- The following table lists all of the possible child elements.
-
-
-
-
-
-
- 3.0.0+
-
-
- The <code><project></code> element is the root of the descriptor.
- The following table lists all of the possible child elements.
-
-
-
-
-
-
- 4.0.0+
- Declares to which version of project descriptor this POM conforms.
-
-
-
-
- 4.0.0+
- The location of the parent project, if one exists. Values from the parent
- project will be the default for this project if they are left unspecified. The location
- is given as a group ID, artifact ID and version.
-
-
-
-
- 3.0.0+
-
-
- A universally unique identifier for a project. It is normal to
- use a fully-qualified package name to distinguish it from other
- projects with a similar name (eg. <code>org.apache.maven</code>).
-
-
-
-
-
-
- 3.0.0+
- The identifier for this artifact that is unique within the group given by the
- group ID. An artifact is something that is either produced or used by a project.
- Examples of artifacts produced by Maven for a project include: JARs, source and binary
- distributions, and WARs.
-
-
-
-
- 4.0.0+
- The current version of the artifact produced by this project.
-
-
-
-
- 4.0.0+
-
-
- The type of artifact this project produces, for example <code>jar</code>
- <code>war</code>
- <code>ear</code>
- <code>pom</code>.
- Plugins can create their own packaging, and
- therefore their own packaging types,
- so this list does not contain all possible types.
-
-
-
-
-
-
- 3.0.0+
- The full name of the project.
-
-
-
-
- 3.0.0+
- A detailed description of the project, used by Maven whenever it needs to
- describe the project, such as on the web site. While this element can be specified as
- CDATA to enable the use of HTML tags within the description, it is discouraged to allow
- plain text representation. If you need to modify the index page of the generated web
- site, you are able to specify your own instead of adjusting this text.
-
-
-
-
- 3.0.0+
-
-
- The URL to the project's homepage.
- <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId
-
-
-
-
-
-
- 3.0.0+
- The year of the project's inception, specified with 4 digits. This value is
- used when generating copyright notices as well as being informational.
-
-
-
-
- 3.0.0+
- This element describes various attributes of the organization to which the
- project belongs. These attributes are utilized when documentation is created (for
- copyright notices and links).
-
-
-
-
- 3.0.0+
-
-
- This element describes all of the licenses for this project.
- Each license is described by a <code>license</code> element, which
- is then described by additional elements.
- Projects should only list the license(s) that applies to the project
- and not the licenses that apply to dependencies.
- If multiple licenses are listed, it is assumed that the user can select
- any of them, not that they must accept all.
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
- Describes the committers of a project.
-
-
-
-
-
-
-
-
-
- 3.0.0+
- Describes the contributors to a project that are not yet committers.
-
-
-
-
-
-
-
-
-
- 3.0.0+
- Contains information about a project's mailing lists.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Describes the prerequisites in the build environment for this project.
-
-
-
-
- 4.0.0+
- The modules (sometimes called subprojects) to build as a part of this
- project. Each module listed is a relative path to the directory containing the module.
- To be consistent with the way default urls are calculated from parent, it is recommended
- to have module names match artifact ids.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Specification for the SCM used by the project, such as CVS, Subversion, etc.
-
-
-
-
- 4.0.0+
- The project's issue management system information.
-
-
-
-
- 4.0.0+
- The project's continuous integration information.
-
-
-
-
- 4.0.0+
- Distribution information for a project that enables deployment of the site
- and artifacts to remote web servers and repositories respectively.
-
-
-
-
- 4.0.0+
-
-
- Properties that can be used throughout the POM as a substitution, and
- are used as filters in resources if enabled.
- The format is <code><name>value</name></code>.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Default dependency information for projects that inherit from this one. The
- dependencies in this section are not immediately resolved. Instead, when a POM derived
- from this one declares a dependency described by a matching groupId and artifactId, the
- version and other values from this section are used for that dependency if they were not
- already specified.
-
-
-
-
- 3.0.0+
-
-
- This element describes all of the dependencies associated with a
- project.
- These dependencies are used to construct a classpath for your
- project during the build process. They are automatically downloaded from the
- repositories defined in this project.
- See <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the
- dependency mechanism</a> for more information.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- The lists of the remote repositories for discovering dependencies and
- extensions.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- The lists of the remote repositories for discovering plugins for builds and
- reports.
-
-
-
-
-
-
-
-
-
- 3.0.0+
- Information required to build the project.
-
-
-
-
- 4.0.0+
-
-
- <b>Deprecated</b>. Now ignored by Maven.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- This element includes the specification of report plugins to use
- to generate the reports on the Maven-generated site.
- These reports will be run when a user executes <code>mvn site</code>.
- All of the reports will be included in the navigation bar for browsing.
-
-
-
-
-
-
- 4.0.0+
- A listing of project-local build profiles which will modify the build process
- when activated.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><parent></code> element contains information required to locate the parent project from which
- this project will inherit from.
- <strong>Note:</strong> The children of this element are not interpolated and must be given as literal values.
-
-
-
-
-
-
- 4.0.0+
- The group id of the parent project to inherit from.
-
-
-
-
- 4.0.0+
- The artifact id of the parent project to inherit from.
-
-
-
-
- 4.0.0+
- The version of the parent project to inherit.
-
-
-
-
- 4.0.0+
-
-
- The relative path of the parent <code>pom.xml</code> file within the check out.
- If not specified, it defaults to <code>../pom.xml</code>.
- Maven looks for the parent POM first in this location on
- the filesystem, then the local repository, and lastly in the remote repo.
- <code>relativePath</code> allows you to select a different location,
- for example when your structure is flat, or deeper without an intermediate parent POM.
- However, the group ID, artifact ID and version are still required,
- and must match the file in the location given or it will revert to the repository for the POM.
- This feature is only for enhancing the development in a local checkout of that project.
- Set the value to an empty string in case you want to disable the feature and always resolve
- the parent POM from the repositories.
-
-
-
-
-
-
-
-
- 3.0.0+
- Specifies the organization that produces this project.
-
-
-
-
- 3.0.0+
- The full name of the organization.
-
-
-
-
- 3.0.0+
- The URL to the organization's home page.
-
-
-
-
-
-
- 4.0.0+
- This elements describes all that pertains to distribution for a project. It is
- primarily used for deployment of artifacts and the site produced by the build.
-
-
-
-
- 4.0.0+
- Information needed to deploy the artifacts generated by the project to a
- remote repository.
-
-
-
-
- 4.0.0+
-
-
- Where to deploy snapshots of artifacts to. If not given, it defaults to the
- <code>repository</code> element.
-
-
-
-
-
-
- 4.0.0+
- Information needed for deploying the web site of the project.
-
-
-
-
- 4.0.0+
-
-
- The URL of the project's download page. If not given users will be
- referred to the homepage given by <code>url</code>.
- This is given to assist in locating artifacts that are not in the repository due to
- licensing restrictions.
-
-
-
-
-
-
- 4.0.0+
- Relocation information of the artifact if it has been moved to a new group ID
- and/or artifact ID.
-
-
-
-
- 4.0.0+
-
-
- Gives the status of this artifact in the remote repository.
- This must not be set in your local project, as it is updated by
- tools placing it in the reposiory. Valid values are: <code>none</code> (default),
- <code>converted</code> (repository manager converted this from an Maven 1 POM),
- <code>partner</code>
- (directly synced from a partner Maven 2 repository), <code>deployed</code> (was deployed from a Maven 2
- instance), <code>verified</code> (has been hand verified as correct and final).
-
-
-
-
-
-
-
-
- 4.0.0+
- Describes where an artifact has moved to. If any of the values are omitted, it is
- assumed to be the same as it was before.
-
-
-
-
- 4.0.0+
- The group ID the artifact has moved to.
-
-
-
-
- 4.0.0+
- The new artifact ID of the artifact.
-
-
-
-
- 4.0.0+
- The new version of the artifact.
-
-
-
-
- 4.0.0+
- An additional message to show the user about the move, such as the reason.
-
-
-
-
-
-
- 4.0.0+
- Contains the information needed for deploying websites.
-
-
-
-
- 4.0.0+
-
-
- A unique identifier for a deployment location. This is used to match the
- site to configuration in the <code>settings.xml</code> file, for example.
-
-
-
-
-
-
- 4.0.0+
- Human readable name of the deployment location.
-
-
-
-
- 4.0.0+
-
-
- The url of the location where website is deployed, in the form <code>protocol://hostname/path</code>.
- <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId
-
-
-
-
-
-
-
-
- 4.0.0+
- Repository contains the information needed for deploying to the remote
- repository.
-
-
-
-
- 4.0.0+
- Whether to assign snapshots a unique version comprised of the timestamp and
- build number, or to use the same version each time
-
-
-
-
- 4.0.0+
- How to handle downloading of releases from this repository.
-
-
-
-
- 4.0.0+
- How to handle downloading of snapshots from this repository.
-
-
-
-
- 4.0.0+
-
-
- A unique identifier for a repository. This is used to match the repository
- to configuration in the <code>settings.xml</code> file, for example. Furthermore, the identifier is
- used during POM inheritance and profile injection to detect repositories that should be merged.
-
-
-
-
-
-
- 4.0.0+
- Human readable name of the repository.
-
-
-
-
- 4.0.0+
-
-
- The url of the repository, in the form <code>protocol://hostname/path</code>.
-
-
-
-
-
-
- 4.0.0+
-
-
- The type of layout this repository uses for locating and storing artifacts -
- can be <code>legacy</code> or <code>default</code>.
-
-
-
-
-
-
-
-
- 4.0.0+
- Download policy.
-
-
-
-
- 4.0.0+
-
-
- Whether to use this repository for downloading this type of artifact. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>true</code>.
-
-
-
-
-
-
- 4.0.0+
-
-
- The frequency for downloading updates - can be
- <code>always,</code>
- <code>daily</code>
- (default),
- <code>interval:XXX</code>
- (in minutes) or
- <code>never</code>
- (only if it doesn't exist locally).
-
-
-
-
-
-
- 4.0.0+
-
-
- What to do when verification of an artifact checksum fails. Valid values are
- <code>ignore</code>
- ,
- <code>fail</code>
- or
- <code>warn</code>
- (the default).
-
-
-
-
-
-
-
-
- 4.0.0+
- Describes the prerequisites a project can have.
-
-
-
-
- 4.0.0+
-
- For a plugin project, the minimum version of Maven required to use
- the resulting plugin.<br />
- For specifying the minimum version of Maven required to build a
- project, this element is <b>deprecated</b>. Use the Maven Enforcer
- Plugin's <a href="https://maven.apache.org/enforcer/enforcer-rules/requireMavenVersion.html"><code>requireMavenVersion</code></a>
- rule instead.
-
-
-
-
-
-
-
-
- 3.0.0+
- Description of a person who has contributed to the project, but who does not have
- commit privileges. Usually, these contributions come in the form of patches submitted.
-
-
-
-
- 3.0.0+
- The full name of the contributor.
-
-
-
-
- 3.0.0+
- The email address of the contributor.
-
-
-
-
- 3.0.0+
- The URL for the homepage of the contributor.
-
-
-
-
- 3.0.0+
- The organization to which the contributor belongs.
-
-
-
-
- 3.0.0+
- The URL of the organization.
-
-
-
-
- 3.0.0+
-
-
- The roles the contributor plays in the project. Each role is described by a
- <code>role</code> element, the body of which is a role name. This can also be used to
- describe the contribution.
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
-
-
- The timezone the contributor is in. Typically, this is a number in the range
- <a href="http://en.wikipedia.org/wiki/UTC%E2%88%9212:00">-12</a> to <a href="http://en.wikipedia.org/wiki/UTC%2B14:00">+14</a>
- or a valid time zone id like "America/Montreal" (UTC-05:00) or "Europe/Paris" (UTC+01:00).
-
-
-
-
-
-
- 3.0.0+
- Properties about the contributor, such as an instant messenger handle.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><scm></code> element contains informations required to the SCM
- (Source Control Management) of the project.
-
-
-
-
-
-
- 4.0.0+
-
-
- The source control management system URL
- that describes the repository and how to connect to the
- repository. For more information, see the
- <a href="http://maven.apache.org/scm/scm-url-format.html">URL format</a>
- and <a href="http://maven.apache.org/scm/scms-overview.html">list of supported SCMs</a>.
- This connection is read-only.
- <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId
-
-
-
-
-
-
- 4.0.0+
-
-
- Just like <code>connection</code>, but for developers, i.e. this scm connection
- will not be read only.
- <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId
-
-
-
-
-
-
- 4.0.0+
- The tag of current code. By default, it's set to HEAD during development.
-
-
-
-
- 4.0.0+
-
-
- The URL to the project's browsable SCM repository, such as ViewVC or Fisheye.
- <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId
-
-
-
-
-
-
-
-
- 4.0.0+
- A repository contains the information needed for establishing connections with
- remote repository.
-
-
-
-
- 4.0.0+
- How to handle downloading of releases from this repository.
-
-
-
-
- 4.0.0+
- How to handle downloading of snapshots from this repository.
-
-
-
-
- 4.0.0+
-
-
- A unique identifier for a repository. This is used to match the repository
- to configuration in the <code>settings.xml</code> file, for example. Furthermore, the identifier is
- used during POM inheritance and profile injection to detect repositories that should be merged.
-
-
-
-
-
-
- 4.0.0+
- Human readable name of the repository.
-
-
-
-
- 4.0.0+
-
-
- The url of the repository, in the form <code>protocol://hostname/path</code>.
-
-
-
-
-
-
- 4.0.0+
-
-
- The type of layout this repository uses for locating and storing artifacts -
- can be <code>legacy</code> or <code>default</code>.
-
-
-
-
-
-
-
-
- 4.0.0+
- Information about the issue tracking (or bug tracking) system used to manage this
- project.
-
-
-
-
- 4.0.0+
- The name of the issue management system, e.g. Bugzilla
-
-
-
-
- 4.0.0+
- URL for the issue management system used by the project.
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><CiManagement></code> element contains informations required to the
- continuous integration system of the project.
-
-
-
-
-
-
- 4.0.0+
-
-
- The name of the continuous integration system, e.g. <code>continuum</code>.
-
-
-
-
-
-
- 4.0.0+
- URL for the continuous integration system used by the project if it has a web
- interface.
-
-
-
-
- 4.0.0+
- Configuration for notifying developers/users when a build is unsuccessful,
- including user information and notification mode.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Configures one method for notifying users/developers when a build breaks.
-
-
-
-
- 4.0.0+
- The mechanism used to deliver notifications.
-
-
-
-
- 4.0.0+
- Whether to send notifications on error.
-
-
-
-
- 4.0.0+
- Whether to send notifications on failure.
-
-
-
-
- 4.0.0+
- Whether to send notifications on success.
-
-
-
-
- 4.0.0+
- Whether to send notifications on warning.
-
-
-
-
- 4.0.0+
-
-
- <b>Deprecated</b>. Where to send the notification to - eg email address.
-
-
-
-
-
-
- 0.0.0+
- Extended configuration specific to this notifier goes here.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Modifications to the build process which is activated based on environmental
- parameters or command line arguments.
-
-
-
-
- 4.0.0+
- The identifier of this build profile. This is used for command line
- activation, and identifies profiles to be merged.
-
-
-
-
-
- 4.0.0+
- The conditional logic which will automatically trigger the inclusion of this
- profile.
-
-
-
-
- 4.0.0+
- Information required to build the project.
-
-
-
-
- 4.0.0+
- The modules (sometimes called subprojects) to build as a part of this
- project. Each module listed is a relative path to the directory containing the module.
- To be consistent with the way default urls are calculated from parent, it is recommended
- to have module names match artifact ids.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Distribution information for a project that enables deployment of the site
- and artifacts to remote web servers and repositories respectively.
-
-
-
-
- 4.0.0+
-
-
- Properties that can be used throughout the POM as a substitution, and
- are used as filters in resources if enabled.
- The format is <code><name>value</name></code>.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Default dependency information for projects that inherit from this one. The
- dependencies in this section are not immediately resolved. Instead, when a POM derived
- from this one declares a dependency described by a matching groupId and artifactId, the
- version and other values from this section are used for that dependency if they were not
- already specified.
-
-
-
-
- 3.0.0+
-
-
- This element describes all of the dependencies associated with a
- project.
- These dependencies are used to construct a classpath for your
- project during the build process. They are automatically downloaded from the
- repositories defined in this project.
- See <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the
- dependency mechanism</a> for more information.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- The lists of the remote repositories for discovering dependencies and
- extensions.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- The lists of the remote repositories for discovering plugins for builds and
- reports.
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- <b>Deprecated</b>. Now ignored by Maven.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- This element includes the specification of report plugins to use
- to generate the reports on the Maven-generated site.
- These reports will be run when a user executes <code>mvn site</code>.
- All of the reports will be included in the navigation bar for browsing.
-
-
-
-
-
-
-
-
- 3.0.0+
- Generic informations for a build.
-
-
-
-
- 3.0.0+
- The default goal (or phase in Maven 2) to execute when none is specified for
- the project. Note that in case of a multi-module build, only the default goal of the top-level
- project is relevant, i.e. the default goals of child modules are ignored. Since Maven 3,
- multiple goals/phases can be separated by whitespace.
-
-
-
-
- 3.0.0+
-
- This element describes all of the classpath resources such as properties
- files associated with a project. These resources are often included in the final
- package.
- The default value is <code>src/main/resources</code>.
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
- This element describes all of the classpath resources such as properties
- files associated with a project's unit tests.
- The default value is <code>src/test/resources</code>.
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
- The directory where all files generated by the build are placed.
- The default value is <code>target</code>.
-
-
-
-
-
- 4.0.0+
-
-
- The filename (excluding the extension, and with no path information) that
- the produced artifact will be called.
- The default value is <code>${artifactId}-${version}</code>.
-
-
-
-
-
-
- 4.0.0+
- The list of filter properties files that are used when filtering is enabled.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Default plugin information to be made available for reference by projects
- derived from this one. This plugin configuration will not be resolved or bound to the
- lifecycle unless referenced. Any local configuration for a given plugin will override
- the plugin's entire definition here.
-
-
-
-
- 4.0.0+
- The list of plugins to use.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><plugin></code> element contains informations required for a plugin.
-
-
-
-
-
-
- 4.0.0+
- The group ID of the plugin in the repository.
-
-
-
-
- 4.0.0+
- The artifact ID of the plugin in the repository.
-
-
-
-
- 4.0.0+
- The version (or valid range of versions) of the plugin to be used.
-
-
-
-
- 4.0.0+
-
-
- Whether to load Maven extensions (such as packaging and type handlers) from
- this plugin. For performance reasons, this should only be enabled when necessary. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>false</code>.
-
-
-
-
-
-
- 4.0.0+
- Multiple specifications of a set of goals to execute during the build
- lifecycle, each having (possibly) a different configuration.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Additional dependencies that this project needs to introduce to the plugin's
- classloader.
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- <b>Deprecated</b>. Unused by Maven.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- Whether any configuration should be propagated to child POMs. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>true</code>.
-
-
-
-
-
-
- 0.0.0+
-
-
- <p>The configuration as DOM object.</p>
- <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add
- <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p>
- <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code>
- or <code>combine.self</code> attributes to the children of the configuration element:</p>
- <ul>
- <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li>
- <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li>
- </ul>
- <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and
- <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a>
- for more information.</p>
-
-
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
-
-
- The <code><dependency></code> element contains information about a dependency
- of the project.
-
-
-
-
-
-
- 3.0.0+
-
-
- The project group that produced the dependency, e.g.
- <code>org.apache.maven</code>.
-
-
-
-
-
-
- 3.0.0+
-
-
- The unique id for an artifact produced by the project group, e.g.
- <code>maven-artifact</code>.
-
-
-
-
-
-
- 3.0.0+
-
-
- The version of the dependency, e.g. <code>3.2.1</code>. In Maven 2, this can also be
- specified as a range of versions.
-
-
-
-
-
-
- 4.0.0+
-
-
- The type of dependency. While it
- usually represents the extension on the filename of the dependency,
- that is not always the case. A type can be mapped to a different
- extension and a classifier.
- The type often corresponds to the packaging used, though this is also
- not always the case.
- Some examples are <code>jar</code>, <code>war</code>, <code>ejb-client</code>
- and <code>test-jar</code>: see <a href="../maven-core/artifact-handlers.html">default
- artifact handlers</a> for a list.
- New types can be defined by plugins that set
- <code>extensions</code> to <code>true</code>, so this is not a complete list.
-
-
-
-
-
-
- 4.0.0+
-
-
- The classifier of the dependency. It is appended to
- the filename after the version. This allows:
- <ul>
- <li>refering to attached artifact, for example <code>sources</code> and <code>javadoc</code>:
- see <a href="../maven-core/artifact-handlers.html">default artifact handlers</a> for a list,</li>
- <li>distinguishing two artifacts
- that belong to the same POM but were built differently.
- For example, <code>jdk14</code> and <code>jdk15</code>.</li>
- </ul>
-
-
-
-
-
-
- 4.0.0+
-
-
- The scope of the dependency - <code>compile</code>, <code>runtime</code>,
- <code>test</code>, <code>system</code>, and <code>provided</code>. Used to
- calculate the various classpaths used for compilation, testing, and so on.
- It also assists in determining which artifacts to include in a distribution of
- this project. For more information, see
- <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the
- dependency mechanism</a>.
-
-
-
-
-
-
- 4.0.0+
-
-
- FOR SYSTEM SCOPE ONLY. Note that use of this property is <b>discouraged</b>
- and may be replaced in later versions. This specifies the path on the filesystem
- for this dependency.
- Requires an absolute path for the value, not relative.
- Use a property that gives the machine specific absolute path,
- e.g. <code>${java.home}</code>.
-
-
-
-
-
-
- 4.0.0+
- Lists a set of artifacts that should be excluded from this dependency's
- artifact list when it comes to calculating transitive dependencies.
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- Indicates the dependency is optional for use of this library. While the
- version of the dependency will be taken into account for dependency calculation if the
- library is used elsewhere, it will not be passed on transitively. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>false</code>.
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><exclusion></code> element contains informations required to exclude
- an artifact to the project.
-
-
-
-
-
-
- 4.0.0+
- The artifact ID of the project to exclude.
-
-
-
-
- 4.0.0+
- The group ID of the project to exclude.
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><execution></code> element contains informations required for the
- execution of a plugin.
-
-
-
-
-
-
- 4.0.0+
- The identifier of this execution for labelling the goals during the build,
- and for matching executions to merge during inheritance and profile injection.
-
-
-
-
- 4.0.0+
- The build lifecycle phase to bind the goals in this execution to. If omitted,
- the goals will be bound to the default phase specified by the plugin.
-
-
-
-
- 4.0.0+
- The goals to execute with the given configuration.
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- Whether any configuration should be propagated to child POMs. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>true</code>.
-
-
-
-
-
-
- 0.0.0+
-
-
- <p>The configuration as DOM object.</p>
- <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add
- <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p>
- <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code>
- or <code>combine.self</code> attributes to the children of the configuration element:</p>
- <ul>
- <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li>
- <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li>
- </ul>
- <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and
- <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a>
- for more information.</p>
-
-
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
- This element describes all of the classpath resources associated with a project
- or unit tests.
-
-
-
-
- 3.0.0+
-
-
- Describe the resource target path. The path is relative to the target/classes
- directory (i.e. <code>${project.build.outputDirectory}</code>).
- For example, if you want that resource to appear in a specific package
- (<code>org.apache.maven.messages</code>), you must specify this
- element with this value: <code>org/apache/maven/messages</code>.
- This is not required if you simply put the resources in that directory
- structure at the source, however.
-
-
-
-
-
-
- 3.0.0+
-
-
- Whether resources are filtered to replace tokens with parameterised values or not.
- The values are taken from the <code>properties</code> element and from the
- properties in the files listed in the <code>filters</code> element. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>false</code>.
-
-
-
-
-
-
- 3.0.0+
- Describe the directory where the resources are stored. The path is relative
- to the POM.
-
-
-
-
- 3.0.0+
-
-
- A list of patterns to include, e.g. <code>**/*.xml</code>.
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
-
-
- A list of patterns to exclude, e.g. <code>**/*.xml</code>
-
-
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Section for management of default plugin information for use in a group of POMs.
-
-
-
-
-
- 4.0.0+
- The list of plugins to use.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Section for management of reports and their configuration.
-
-
-
-
- 4.0.0+
-
-
- If true, then the default reports are not included in the site generation.
- This includes the reports in the "Project Info" menu. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>false</code>.
-
-
-
-
-
-
- 4.0.0+
-
-
- Where to store all of the generated reports. The default is
- <code>${project.build.directory}/site</code>.
-
-
-
-
-
-
- 4.0.0+
- The reporting plugins to use and their configuration.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- The <code><plugin></code> element contains informations required for a report plugin.
-
-
-
-
-
-
- 4.0.0+
- The group ID of the reporting plugin in the repository.
-
-
-
-
- 4.0.0+
- The artifact ID of the reporting plugin in the repository.
-
-
-
-
- 4.0.0+
- The version of the reporting plugin to be used.
-
-
-
-
- 4.0.0+
-
-
- Multiple specifications of a set of reports, each having (possibly) different
- configuration. This is the reporting parallel to an <code>execution</code> in the build.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- Whether any configuration should be propagated to child POMs. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>true</code>.
-
-
-
-
-
-
- 0.0.0+
-
-
- <p>The configuration as DOM object.</p>
- <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add
- <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p>
- <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code>
- or <code>combine.self</code> attributes to the children of the configuration element:</p>
- <ul>
- <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li>
- <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li>
- </ul>
- <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and
- <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a>
- for more information.</p>
-
-
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Represents a set of reports and configuration to be used to generate them.
-
-
-
-
- 0.0.0+
- The unique id for this report set, to be used during POM inheritance and profile injection
- for merging of report sets.
-
-
-
-
-
- 4.0.0+
- The list of reports from this plugin which should be generated from this set.
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
-
- Whether any configuration should be propagated to child POMs. Note: While the type
- of this field is <code>String</code> for technical reasons, the semantic type is actually
- <code>Boolean</code>. Default value is <code>true</code>.
-
-
-
-
-
-
- 0.0.0+
-
-
- <p>The configuration as DOM object.</p>
- <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add
- <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p>
- <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code>
- or <code>combine.self</code> attributes to the children of the configuration element:</p>
- <ul>
- <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li>
- <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li>
- </ul>
- <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and
- <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a>
- for more information.</p>
-
-
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- The conditions within the build runtime environment which will trigger the
- automatic inclusion of the build profile. Multiple conditions can be defined, which must
- be all satisfied to activate the profile.
-
-
-
-
-
- 4.0.0+
- If set to true, this profile will be active unless another profile in this
- pom is activated using the command line -P option or by one of that profile's
- activators.
-
-
-
-
- 4.0.0+
-
-
- Specifies that this profile will be activated when a matching JDK is detected.
- For example, <code>1.4</code> only activates on JDKs versioned 1.4,
- while <code>!1.4</code> matches any JDK that is not version 1.4. Ranges are supported too:
- <code>[1.5,)</code> activates when the JDK is 1.5 minimum.
-
-
-
-
-
-
- 4.0.0+
- Specifies that this profile will be activated when matching operating system
- attributes are detected.
-
-
-
-
- 4.0.0+
- Specifies that this profile will be activated when this system property is
- specified.
-
-
-
-
- 4.0.0+
- Specifies that this profile will be activated based on existence of a file.
-
-
-
-
-
-
- 4.0.0+
- This is the property specification used to activate a profile. If the value field
- is empty, then the existence of the named property will activate the profile, otherwise it
- does a case-sensitive match against the property value as well.
-
-
-
-
- 4.0.0+
- The name of the property to be used to activate a profile.
-
-
-
-
- 4.0.0+
- The value of the property required to activate a profile.
-
-
-
-
-
-
- 4.0.0+
- This is an activator which will detect an operating system's attributes in order
- to activate its profile.
-
-
-
-
- 4.0.0+
-
-
- The name of the operating system to be used to activate the profile. This must be an exact match
- of the <code>${os.name}</code> Java property, such as <code>Windows XP</code>.
-
-
-
-
-
-
- 4.0.0+
-
-
- The general family of the OS to be used to activate the profile, such as
- <code>windows</code> or <code>unix</code>.
-
-
-
-
-
-
- 4.0.0+
- The architecture of the operating system to be used to activate the
- profile.
-
-
-
-
- 4.0.0+
- The version of the operating system to be used to activate the
- profile.
-
-
-
-
-
-
- 4.0.0+
- This is the file specification used to activate the profile. The <code>missing</code> value
- is the location of a file that needs to exist, and if it doesn't, the profile will be
- activated. On the other hand, <code>exists</code> will test for the existence of the file and if it is
- there, the profile will be activated.<br/>
- Variable interpolation for these file specifications is limited to <code>${basedir}</code>,
- System properties and request properties.
-
-
-
-
- 4.0.0+
- The name of the file that must be missing to activate the
- profile.
-
-
-
-
- 4.0.0+
- The name of the file that must exist to activate the profile.
-
-
-
-
-
-
- 4.0.0+
- Section for management of default dependency information for use in a group of
- POMs.
-
-
-
-
- 4.0.0+
- The dependencies specified here are not used until they are referenced in a
- POM within the group. This allows the specification of a "standard" version for a
- particular dependency.
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
-
-
- The <code><build></code> element contains informations required to build the project.
- Default values are defined in Super POM.
-
-
-
-
-
-
- 3.0.0+
-
- This element specifies a directory containing the source of the project. The
- generated build system will compile the sources from this directory when the project is
- built. The path given is relative to the project descriptor.
- The default value is <code>src/main/java</code>.
-
-
-
-
-
- 4.0.0+
-
- This element specifies a directory containing the script sources of the
- project. This directory is meant to be different from the sourceDirectory, in that its
- contents will be copied to the output directory in most cases (since scripts are
- interpreted rather than compiled).
- The default value is <code>src/main/scripts</code>.
-
-
-
-
-
- 4.0.0+
-
- This element specifies a directory containing the unit test source of the
- project. The generated build system will compile these directories when the project is
- being tested. The path given is relative to the project descriptor.
- The default value is <code>src/test/java</code>.
-
-
-
-
-
- 4.0.0+
-
- The directory where compiled application classes are placed.
- The default value is <code>target/classes</code>.
-
-
-
-
-
- 4.0.0+
-
- The directory where compiled test classes are placed.
- The default value is <code>target/test-classes</code>.
-
-
-
-
-
- 4.0.0+
- A set of build extensions to use from this project.
-
-
-
-
-
-
-
-
-
- 3.0.0+
- The default goal (or phase in Maven 2) to execute when none is specified for
- the project. Note that in case of a multi-module build, only the default goal of the top-level
- project is relevant, i.e. the default goals of child modules are ignored. Since Maven 3,
- multiple goals/phases can be separated by whitespace.
-
-
-
-
- 3.0.0+
-
- This element describes all of the classpath resources such as properties
- files associated with a project. These resources are often included in the final
- package.
- The default value is <code>src/main/resources</code>.
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
- This element describes all of the classpath resources such as properties
- files associated with a project's unit tests.
- The default value is <code>src/test/resources</code>.
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
-
- The directory where all files generated by the build are placed.
- The default value is <code>target</code>.
-
-
-
-
-
- 4.0.0+
-
-
- The filename (excluding the extension, and with no path information) that
- the produced artifact will be called.
- The default value is <code>${artifactId}-${version}</code>.
-
-
-
-
-
-
- 4.0.0+
- The list of filter properties files that are used when filtering is enabled.
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Default plugin information to be made available for reference by projects
- derived from this one. This plugin configuration will not be resolved or bound to the
- lifecycle unless referenced. Any local configuration for a given plugin will override
- the plugin's entire definition here.
-
-
-
-
- 4.0.0+
- The list of plugins to use.
-
-
-
-
-
-
-
-
-
-
-
- 4.0.0+
- Describes a build extension to utilise.
-
-
-
-
- 4.0.0+
- The group ID of the extension's artifact.
-
-
-
-
- 4.0.0+
- The artifact ID of the extension.
-
-
-
-
- 4.0.0+
- The version of the extension.
-
-
-
-
-
-
- 3.0.0+
- Describes the licenses for this project. This is used to generate the license
- page of the project's web site, as well as being taken into consideration in other reporting
- and validation. The licenses listed for the project are that of the project itself, and not
- of dependencies.
-
-
-
-
- 3.0.0+
- The full legal name of the license.
-
-
-
-
- 3.0.0+
- The official url for the license text.
-
-
-
-
- 3.0.0+
-
-
- The primary method by which this project may be distributed.
- <dl>
- <dt>repo</dt>
- <dd>may be downloaded from the Maven repository</dd>
- <dt>manual</dt>
- <dd>user must manually download and install the dependency.</dd>
- </dl>
-
-
-
-
-
-
- 3.0.0+
- Addendum information pertaining to this license.
-
-
-
-
-
-
- 3.0.0+
- This element describes all of the mailing lists associated with a project. The
- auto-generated site references this information.
-
-
-
-
- 3.0.0+
-
-
- The name of the mailing list.
-
-
-
-
-
-
- 3.0.0+
-
-
- The email address or link that can be used to subscribe to
- the mailing list. If this is an email address, a
- <code>mailto:</code> link will automatically be created
- when the documentation is created.
-
-
-
-
-
-
- 3.0.0+
-
-
- The email address or link that can be used to unsubscribe to
- the mailing list. If this is an email address, a
- <code>mailto:</code> link will automatically be created
- when the documentation is created.
-
-
-
-
-
-
- 3.0.0+
-
-
- The email address or link that can be used to post to
- the mailing list. If this is an email address, a
- <code>mailto:</code> link will automatically be created
- when the documentation is created.
-
-
-
-
-
-
- 3.0.0+
- The link to a URL where you can browse the mailing list archive.
-
-
-
-
- 3.0.0+
- The link to alternate URLs where you can browse the list archive.
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
- Information about one of the committers on this project.
-
-
-
-
- 3.0.0+
- The unique ID of the developer in the SCM.
-
-
-
-
- 3.0.0+
- The full name of the contributor.
-
-
-
-
- 3.0.0+
- The email address of the contributor.
-
-
-
-
- 3.0.0+
- The URL for the homepage of the contributor.
-
-
-
-
- 3.0.0+
- The organization to which the contributor belongs.
-
-
-
-
- 3.0.0+
- The URL of the organization.
-
-
-
-
- 3.0.0+
-
-
- The roles the contributor plays in the project. Each role is described by a
- <code>role</code> element, the body of which is a role name. This can also be used to
- describe the contribution.
-
-
-
-
-
-
-
-
-
-
-
- 3.0.0+
-
-
- The timezone the contributor is in. Typically, this is a number in the range
- <a href="http://en.wikipedia.org/wiki/UTC%E2%88%9212:00">-12</a> to <a href="http://en.wikipedia.org/wiki/UTC%2B14:00">+14</a>
- or a valid time zone id like "America/Montreal" (UTC-05:00) or "Europe/Paris" (UTC+01:00).
-
-
-
-
-
-
- 3.0.0+
- Properties about the contributor, such as an instant messenger handle.
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/_tools/scalastyle.xml b/_tools/scalastyle.xml
deleted file mode 100644
index f7bb0d4819c..00000000000
--- a/_tools/scalastyle.xml
+++ /dev/null
@@ -1,146 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
- Scalastyle standard configuration
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/_tools/site/css/maven-base.css b/_tools/site/css/maven-base.css
deleted file mode 100644
index 53153e9fe1a..00000000000
--- a/_tools/site/css/maven-base.css
+++ /dev/null
@@ -1,155 +0,0 @@
-body {
- margin: 0px;
- padding: 0px;
-}
-img {
- border:none;
-}
-table {
- padding:0px;
- width: 100%;
- margin-left: -2px;
- margin-right: -2px;
-}
-acronym {
- cursor: help;
- border-bottom: 1px dotted #feb;
-}
-table.bodyTable th, table.bodyTable td {
- padding: 2px 4px 2px 4px;
- vertical-align: top;
-}
-div.clear{
- clear:both;
- visibility: hidden;
-}
-div.clear hr{
- display: none;
-}
-#bannerLeft, #bannerRight {
- font-size: xx-large;
- font-weight: bold;
-}
-#bannerLeft img, #bannerRight img {
- margin: 0px;
-}
-.xleft, #bannerLeft img {
- float:left;
-}
-.xright, #bannerRight {
- float:right;
-}
-#banner {
- padding: 0px;
-}
-#banner img {
- border: none;
-}
-#breadcrumbs {
- padding: 3px 10px 3px 10px;
-}
-#leftColumn {
- width: 170px;
- float:left;
- overflow: auto;
-}
-#bodyColumn {
- margin-right: 1.5em;
- margin-left: 197px;
-}
-#legend {
- padding: 8px 0 8px 0;
-}
-#navcolumn {
- padding: 8px 4px 0 8px;
-}
-#navcolumn h5 {
- margin: 0;
- padding: 0;
- font-size: small;
-}
-#navcolumn ul {
- margin: 0;
- padding: 0;
- font-size: small;
-}
-#navcolumn li {
- list-style-type: none;
- background-image: none;
- background-repeat: no-repeat;
- background-position: 0 0.4em;
- padding-left: 16px;
- list-style-position: outside;
- line-height: 1.2em;
- font-size: smaller;
-}
-#navcolumn li.expanded {
- background-image: url(../images/expanded.gif);
-}
-#navcolumn li.collapsed {
- background-image: url(../images/collapsed.gif);
-}
-#navcolumn li.none {
- text-indent: -1em;
- margin-left: 1em;
-}
-#poweredBy {
- text-align: center;
-}
-#navcolumn img {
- margin-top: 10px;
- margin-bottom: 3px;
-}
-#poweredBy img {
- display:block;
- margin: 20px 0 20px 17px;
-}
-#search img {
- margin: 0px;
- display: block;
-}
-#search #q, #search #btnG {
- border: 1px solid #999;
- margin-bottom:10px;
-}
-#search form {
- margin: 0px;
-}
-#lastPublished {
- font-size: x-small;
-}
-.navSection {
- margin-bottom: 2px;
- padding: 8px;
-}
-.navSectionHead {
- font-weight: bold;
- font-size: x-small;
-}
-.section {
- padding: 4px;
-}
-#footer {
- padding: 3px 10px 3px 10px;
- font-size: x-small;
-}
-#breadcrumbs {
- font-size: x-small;
- margin: 0pt;
-}
-.source {
- padding: 12px;
- margin: 1em 7px 1em 7px;
-}
-.source pre {
- margin: 0px;
- padding: 0px;
-}
-#navcolumn img.imageLink, .imageLink {
- padding-left: 0px;
- padding-bottom: 0px;
- padding-top: 0px;
- padding-right: 2px;
- border: 0px;
- margin: 0px;
-}
diff --git a/_tools/site/css/maven-theme.css b/_tools/site/css/maven-theme.css
deleted file mode 100644
index c982168bf24..00000000000
--- a/_tools/site/css/maven-theme.css
+++ /dev/null
@@ -1,141 +0,0 @@
-body {
- padding: 0px 0px 10px 0px;
-}
-body, td, select, input, li{
- font-family: Verdana, Helvetica, Arial, sans-serif;
- font-size: 13px;
-}
-code{
- font-family: Courier, monospace;
- font-size: 13px;
-}
-a {
- text-decoration: none;
-}
-a:link {
- color:#36a;
-}
-a:visited {
- color:#47a;
-}
-a:active, a:hover {
- color:#69c;
-}
-#legend li.externalLink {
- background: url(../images/external.png) left top no-repeat;
- padding-left: 18px;
-}
-a.externalLink, a.externalLink:link, a.externalLink:visited, a.externalLink:active, a.externalLink:hover {
- background: url(../images/external.png) right center no-repeat;
- padding-right: 18px;
-}
-#legend li.newWindow {
- background: url(../images/newwindow.png) left top no-repeat;
- padding-left: 18px;
-}
-a.newWindow, a.newWindow:link, a.newWindow:visited, a.newWindow:active, a.newWindow:hover {
- background: url(../images/newwindow.png) right center no-repeat;
- padding-right: 18px;
-}
-h2 {
- padding: 4px 4px 4px 6px;
- border: 1px solid #999;
- color: #900;
- background-color: #ddd;
- font-weight:900;
- font-size: x-large;
-}
-h3 {
- padding: 4px 4px 4px 6px;
- border: 1px solid #aaa;
- color: #900;
- background-color: #eee;
- font-weight: normal;
- font-size: large;
-}
-h4 {
- padding: 4px 4px 4px 6px;
- border: 1px solid #bbb;
- color: #900;
- background-color: #fff;
- font-weight: normal;
- font-size: large;
-}
-h5 {
- padding: 4px 4px 4px 6px;
- color: #900;
- font-size: normal;
-}
-p {
- line-height: 1.3em;
- font-size: small;
-}
-#breadcrumbs {
- border-top: 1px solid #aaa;
- border-bottom: 1px solid #aaa;
- background-color: #ccc;
-}
-#leftColumn {
- margin: 10px 0 0 5px;
- border: 1px solid #999;
- background-color: #eee;
-}
-#navcolumn h5 {
- font-size: smaller;
- border-bottom: 1px solid #aaaaaa;
- padding-top: 2px;
- color: #000;
-}
-
-table.bodyTable th {
- color: white;
- background-color: #bbb;
- text-align: left;
- font-weight: bold;
-}
-
-table.bodyTable th, table.bodyTable td {
- font-size: 1em;
-}
-
-table.bodyTable tr.a {
- background-color: #ddd;
-}
-
-table.bodyTable tr.b {
- background-color: #eee;
-}
-
-.source {
- border: 1px solid #999;
-}
-dl {
- padding: 4px 4px 4px 6px;
- border: 1px solid #aaa;
- background-color: #ffc;
-}
-dt {
- color: #900;
-}
-#organizationLogo img, #projectLogo img, #projectLogo span{
- margin: 8px;
-}
-#banner {
- border-bottom: 1px solid #fff;
-}
-.errormark, .warningmark, .donemark, .infomark {
- background: url(../images/icon_error_sml.gif) no-repeat;
-}
-
-.warningmark {
- background-image: url(../images/icon_warning_sml.gif);
-}
-
-.donemark {
- background-image: url(../images/icon_success_sml.gif);
-}
-
-.infomark {
- background-image: url(../images/icon_info_sml.gif);
-}
-
diff --git a/_tools/site/css/print.css b/_tools/site/css/print.css
deleted file mode 100644
index f09d546c225..00000000000
--- a/_tools/site/css/print.css
+++ /dev/null
@@ -1,7 +0,0 @@
-#banner, #footer, #leftcol, #breadcrumbs, .docs #toc, .docs .courtesylinks, #leftColumn, #navColumn {
- display: none !important;
-}
-#bodyColumn, body.docs div.docs {
- margin: 0 !important;
- border: none !important
-}
diff --git a/_tools/site/css/site.css b/_tools/site/css/site.css
deleted file mode 100644
index 055e7e286ad..00000000000
--- a/_tools/site/css/site.css
+++ /dev/null
@@ -1 +0,0 @@
-/* You can override this file with your own styles */
\ No newline at end of file
diff --git a/_tools/site/images/close.gif b/_tools/site/images/close.gif
deleted file mode 100644
index 1c26bbc5264..00000000000
Binary files a/_tools/site/images/close.gif and /dev/null differ
diff --git a/_tools/site/images/collapsed.gif b/_tools/site/images/collapsed.gif
deleted file mode 100644
index 6e710840640..00000000000
Binary files a/_tools/site/images/collapsed.gif and /dev/null differ
diff --git a/_tools/site/images/expanded.gif b/_tools/site/images/expanded.gif
deleted file mode 100644
index 0fef3d89e0d..00000000000
Binary files a/_tools/site/images/expanded.gif and /dev/null differ
diff --git a/_tools/site/images/external.png b/_tools/site/images/external.png
deleted file mode 100644
index 3f999fc88b3..00000000000
Binary files a/_tools/site/images/external.png and /dev/null differ
diff --git a/_tools/site/images/icon_error_sml.gif b/_tools/site/images/icon_error_sml.gif
deleted file mode 100644
index 61132ef2b01..00000000000
Binary files a/_tools/site/images/icon_error_sml.gif and /dev/null differ
diff --git a/_tools/site/images/icon_info_sml.gif b/_tools/site/images/icon_info_sml.gif
deleted file mode 100644
index c6cb9ad7ce4..00000000000
Binary files a/_tools/site/images/icon_info_sml.gif and /dev/null differ
diff --git a/_tools/site/images/icon_success_sml.gif b/_tools/site/images/icon_success_sml.gif
deleted file mode 100644
index 52e85a430af..00000000000
Binary files a/_tools/site/images/icon_success_sml.gif and /dev/null differ
diff --git a/_tools/site/images/icon_warning_sml.gif b/_tools/site/images/icon_warning_sml.gif
deleted file mode 100644
index 873bbb52cb9..00000000000
Binary files a/_tools/site/images/icon_warning_sml.gif and /dev/null differ
diff --git a/_tools/site/images/logos/build-by-maven-black.png b/_tools/site/images/logos/build-by-maven-black.png
deleted file mode 100644
index 919fd0f66a7..00000000000
Binary files a/_tools/site/images/logos/build-by-maven-black.png and /dev/null differ
diff --git a/_tools/site/images/logos/build-by-maven-white.png b/_tools/site/images/logos/build-by-maven-white.png
deleted file mode 100644
index 7d44c9c2e57..00000000000
Binary files a/_tools/site/images/logos/build-by-maven-white.png and /dev/null differ
diff --git a/_tools/site/images/logos/maven-feather.png b/_tools/site/images/logos/maven-feather.png
deleted file mode 100644
index b5ada836e9e..00000000000
Binary files a/_tools/site/images/logos/maven-feather.png and /dev/null differ
diff --git a/_tools/site/images/newwindow.png b/_tools/site/images/newwindow.png
deleted file mode 100644
index 6287f72bd08..00000000000
Binary files a/_tools/site/images/newwindow.png and /dev/null differ
diff --git a/_tools/site/images/rss.png b/_tools/site/images/rss.png
deleted file mode 100644
index f0796ac8862..00000000000
Binary files a/_tools/site/images/rss.png and /dev/null differ
diff --git a/alluxio/pom.xml b/alluxio/pom.xml
index f9d756d3df1..87d374db3e3 100644
--- a/alluxio/pom.xml
+++ b/alluxio/pom.xml
@@ -23,7 +23,7 @@
zeppelin-interpreter-parentorg.apache.zeppelin
- 0.10.0-SNAPSHOT
+ 0.12.0-SNAPSHOT../zeppelin-interpreter-parent/pom.xml
@@ -32,72 +32,61 @@
Zeppelin: Alluxio interpreter
- 1.0.0
+ 2.9.0alluxio
- com.google.guava
- guava
- 15.0
-
-
-
- org.alluxio
- alluxio-shell
- ${alluxio.version}
+ org.alluxio
+ alluxio-shell
+ ${alluxio.version}
- org.mockito
- mockito-all
- test
-
-
-
- org.powermock
- powermock-api-mockito
- test
-
-
-
- org.powermock
- powermock-core
+ com.google.guava
+ guava
+ 31.0.1-jretest
-
- org.powermock
- powermock-module-junit4
+ com.google.protobuf
+ protobuf-java
+ 3.16.1test
- org.powermock
- powermock-reflect
+ org.mockito
+ mockito-coretestorg.alluxio
- alluxio-core-server
+ alluxio-minicluster${alluxio.version}test
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
- org.alluxio
- alluxio-minicluster
- ${alluxio.version}
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.version}test
- org.alluxio
- alluxio-underfs-local
- ${alluxio.version}
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}test
@@ -107,9 +96,6 @@
maven-enforcer-plugin
-
- maven-dependency-plugin
- maven-resources-plugin
@@ -119,9 +105,6 @@
org.apache.maven.pluginsmaven-checkstyle-plugin
-
- false
-
diff --git a/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java b/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java
index be912ecab5e..5478926ec9d 100644
--- a/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java
+++ b/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java
@@ -18,6 +18,10 @@
package org.apache.zeppelin.alluxio;
+import alluxio.cli.fs.FileSystemShell;
+import alluxio.conf.Configuration;
+import alluxio.conf.AlluxioConfiguration;
+import alluxio.conf.PropertyKey;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -29,9 +33,8 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
+import java.util.stream.Stream;
-import alluxio.Configuration;
-import alluxio.shell.AlluxioShell;
import org.apache.zeppelin.completer.CompletionType;
import org.apache.zeppelin.interpreter.Interpreter;
@@ -44,13 +47,13 @@
* Alluxio interpreter for Zeppelin.
*/
public class AlluxioInterpreter extends Interpreter {
-
- Logger logger = LoggerFactory.getLogger(AlluxioInterpreter.class);
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(AlluxioInterpreter.class);
protected static final String ALLUXIO_MASTER_HOSTNAME = "alluxio.master.hostname";
protected static final String ALLUXIO_MASTER_PORT = "alluxio.master.port";
- private AlluxioShell fs;
+ private FileSystemShell fs;
private int totalCommands = 0;
private int completedCommands = 0;
@@ -73,24 +76,33 @@ public AlluxioInterpreter(Properties property) {
alluxioMasterPort = property.getProperty(ALLUXIO_MASTER_PORT);
}
+ private Stream filteredProperties(String prefix) {
+ return properties.stringPropertyNames().stream().filter(
+ propertyKey -> propertyKey.startsWith(prefix)
+ );
+ }
+
@Override
public void open() {
- logger.info("Starting Alluxio shell to connect to " + alluxioMasterHostname +
+ LOGGER.info("Starting Alluxio shell to connect to " + alluxioMasterHostname +
" on port " + alluxioMasterPort);
+ // Setting the extra parameters being set in the interpreter config starting with alluxio
+ filteredProperties("alluxio.").forEach(x -> System.setProperty(x, properties.getProperty(x)));
+
+ System.setProperty(PropertyKey.USER_RPC_RETRY_MAX_DURATION.getName(), "5s");
- System.setProperty(ALLUXIO_MASTER_HOSTNAME, alluxioMasterHostname);
- System.setProperty(ALLUXIO_MASTER_PORT, alluxioMasterPort);
- fs = new AlluxioShell(new Configuration());
+ AlluxioConfiguration conf = Configuration.global();
+ // Reduce the RPC retry max duration to fall earlier for CLIs
+ fs = new FileSystemShell(conf);
}
@Override
public void close() {
- logger.info("Closing Alluxio shell");
-
+ LOGGER.info("Closing Alluxio shell");
try {
fs.close();
} catch (IOException e) {
- logger.error("Cannot close connection", e);
+ LOGGER.error("Cannot close connection", e);
}
}
diff --git a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java
index 06711de3265..00405113a28 100644
--- a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java
+++ b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java
@@ -18,14 +18,13 @@
package org.apache.zeppelin.alluxio;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
+import alluxio.conf.Configuration;
+import alluxio.grpc.WritePType;
+import alluxio.client.file.FileSystemTestUtils;
+import alluxio.master.LocalAlluxioCluster;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -33,17 +32,9 @@
import java.util.Properties;
import alluxio.AlluxioURI;
-import alluxio.Constants;
-import alluxio.client.FileSystemTestUtils;
-import alluxio.client.WriteType;
-import alluxio.client.file.FileInStream;
import alluxio.client.file.FileSystem;
import alluxio.client.file.URIStatus;
import alluxio.exception.AlluxioException;
-import alluxio.exception.ExceptionMessage;
-import alluxio.master.LocalAlluxioCluster;
-import alluxio.shell.command.CommandUtils;
-import alluxio.util.FormatUtils;
import alluxio.util.io.BufferUtils;
import alluxio.util.io.PathUtils;
@@ -51,37 +42,49 @@
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import static alluxio.cli.fs.command.CountCommand.COUNT_FORMAT;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
public class AlluxioInterpreterTest {
private AlluxioInterpreter alluxioInterpreter;
- private static final int SIZE_BYTES = Constants.MB * 10;
private LocalAlluxioCluster mLocalAlluxioCluster = null;
private FileSystem fs = null;
- @After
- public final void after() throws Exception {
+ @AfterEach
+ final void after() throws Exception {
if (alluxioInterpreter != null) {
alluxioInterpreter.close();
}
+
mLocalAlluxioCluster.stop();
}
- @Before
- public final void before() throws Exception {
- mLocalAlluxioCluster = new LocalAlluxioCluster(SIZE_BYTES, 1000);
+ @BeforeEach
+ final void before() throws Exception {
+ mLocalAlluxioCluster = new LocalAlluxioCluster(1, false);
+ mLocalAlluxioCluster.initConfiguration("alluxio-test");
+ Configuration.global().validate();
mLocalAlluxioCluster.start();
+
fs = mLocalAlluxioCluster.getClient();
final Properties props = new Properties();
- props.put(AlluxioInterpreter.ALLUXIO_MASTER_HOSTNAME, mLocalAlluxioCluster.getMasterHostname());
- props.put(AlluxioInterpreter.ALLUXIO_MASTER_PORT, mLocalAlluxioCluster.getMasterPort() + "");
+ props.put(AlluxioInterpreter.ALLUXIO_MASTER_HOSTNAME, mLocalAlluxioCluster.getHostname());
+ props.put(AlluxioInterpreter.ALLUXIO_MASTER_PORT, mLocalAlluxioCluster.getMasterRpcPort() + "");
alluxioInterpreter = new AlluxioInterpreter(props);
alluxioInterpreter.open();
}
@Test
- public void testCompletion() {
- List expectedResultOne = Arrays.asList(
+ void testCompletion() {
+ List expectedResultOne = Arrays.asList(
new InterpreterCompletion("cat", "cat", CompletionType.command.name()),
new InterpreterCompletion("chgrp", "chgrp", CompletionType.command.name()),
new InterpreterCompletion("chmod", "chmod", CompletionType.command.name()),
@@ -90,18 +93,18 @@ public void testCompletion() {
new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name()),
new InterpreterCompletion("count", "count", CompletionType.command.name()),
new InterpreterCompletion("createLineage", "createLineage", CompletionType.command.name()));
- List expectedResultTwo = Arrays.asList(
+ List expectedResultTwo = Arrays.asList(
new InterpreterCompletion("copyFromLocal", "copyFromLocal",
CompletionType.command.name()),
new InterpreterCompletion("copyToLocal", "copyToLocal",
CompletionType.command.name()),
new InterpreterCompletion("count", "count", CompletionType.command.name()));
- List expectedResultThree = Arrays.asList(
+ List expectedResultThree = Arrays.asList(
new InterpreterCompletion("copyFromLocal", "copyFromLocal",
CompletionType.command.name()),
new InterpreterCompletion("copyToLocal", "copyToLocal",
CompletionType.command.name()));
- List expectedResultNone = new ArrayList<>();
+ List expectedResultNone = new ArrayList<>();
List resultOne = alluxioInterpreter.completion("c", 0, null);
List resultTwo = alluxioInterpreter.completion("co", 0, null);
@@ -109,255 +112,92 @@ public void testCompletion() {
List resultNotMatch = alluxioInterpreter.completion("notMatch", 0, null);
List resultAll = alluxioInterpreter.completion("", 0, null);
- Assert.assertEquals(expectedResultOne, resultOne);
- Assert.assertEquals(expectedResultTwo, resultTwo);
- Assert.assertEquals(expectedResultThree, resultThree);
- Assert.assertEquals(expectedResultNone, resultNotMatch);
+ assertEquals(expectedResultOne, resultOne);
+ assertEquals(expectedResultTwo, resultTwo);
+ assertEquals(expectedResultThree, resultThree);
+ assertEquals(expectedResultNone, resultNotMatch);
- List allCompletionList = new ArrayList<>();
+ List allCompletionList = new ArrayList<>();
for (InterpreterCompletion ic : resultAll) {
allCompletionList.add(ic.getName());
}
- Assert.assertEquals(alluxioInterpreter.keywords, allCompletionList);
- }
-
- @Test
- public void catDirectoryTest() throws IOException {
- String expected = "Successfully created directory /testDir\n\n" +
- "Path /testDir must be a file\n";
-
- InterpreterResult output = alluxioInterpreter.interpret("mkdir /testDir" +
- "\ncat /testDir", null);
-
- Assert.assertEquals(Code.ERROR, output.code());
- Assert.assertEquals(expected, output.message().get(0).getData());
+ assertEquals(alluxioInterpreter.keywords, allCompletionList);
}
@Test
- public void catNotExistTest() throws IOException {
- InterpreterResult output = alluxioInterpreter.interpret("cat /testFile", null);
- Assert.assertEquals(Code.ERROR, output.code());
- }
-
- @Test
- public void catTest() throws IOException {
- FileSystemTestUtils.createByteFile(fs, "/testFile", WriteType.MUST_CACHE,
- 10, 10);
+ void catTest() throws IOException {
+ FileSystemTestUtils.createByteFile(fs, "/testFile", WritePType.MUST_CACHE, 10, 10);
InterpreterResult output = alluxioInterpreter.interpret("cat /testFile", null);
byte[] expected = BufferUtils.getIncreasingByteArray(10);
- Assert.assertEquals(Code.SUCCESS, output.code());
- Assert.assertArrayEquals(expected,
+ assertEquals(Code.SUCCESS, output.code());
+ assertArrayEquals(expected,
output.message().get(0).getData().substring(0,
output.message().get(0).getData().length() - 1).getBytes());
}
@Test
- public void copyFromLocalLargeTest() throws IOException, AlluxioException {
- File testFile = new File(mLocalAlluxioCluster.getAlluxioHome() + "/testFile");
- testFile.createNewFile();
- FileOutputStream fos = new FileOutputStream(testFile);
- byte[] toWrite = BufferUtils.getIncreasingByteArray(SIZE_BYTES);
- fos.write(toWrite);
- fos.close();
-
- InterpreterResult output = alluxioInterpreter.interpret("copyFromLocal " +
- testFile.getAbsolutePath() + " /testFile", null);
- Assert.assertEquals(
- "Copied " + testFile.getAbsolutePath() + " to /testFile\n\n",
- output.message().get(0).getData());
-
- long fileLength = fs.getStatus(new AlluxioURI("/testFile")).getLength();
- Assert.assertEquals(SIZE_BYTES, fileLength);
-
- FileInStream fStream = fs.openFile(new AlluxioURI("/testFile"));
- byte[] read = new byte[SIZE_BYTES];
- fStream.read(read);
- Assert.assertTrue(BufferUtils.equalIncreasingByteArray(SIZE_BYTES, read));
- }
-
- @Test
- public void loadFileTest() throws IOException, AlluxioException {
- FileSystemTestUtils.createByteFile(fs, "/testFile", WriteType.CACHE_THROUGH, 10, 10);
+ void loadFileTest() throws IOException, AlluxioException {
+ FileSystemTestUtils.createByteFile(fs, "/testFile", WritePType.CACHE_THROUGH, 10, 10);
int memPercentage = fs.getStatus(new AlluxioURI("/testFile")).getInMemoryPercentage();
- Assert.assertFalse(memPercentage == 0);
+ assertNotEquals(0, memPercentage);
alluxioInterpreter.interpret("load /testFile", null);
memPercentage = fs.getStatus(new AlluxioURI("/testFile")).getInMemoryPercentage();
- Assert.assertTrue(memPercentage == 100);
+ assertEquals(100, memPercentage);
}
@Test
- public void loadDirTest() throws IOException, AlluxioException {
- FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA", WriteType.CACHE_THROUGH, 10, 10);
- FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileB", WriteType.MUST_CACHE, 10, 10);
-
- int memPercentageA = fs.getStatus(
- new AlluxioURI("/testRoot/testFileA")).getInMemoryPercentage();
- int memPercentageB = fs.getStatus(
- new AlluxioURI("/testRoot/testFileB")).getInMemoryPercentage();
- Assert.assertFalse(memPercentageA == 0);
- Assert.assertTrue(memPercentageB == 100);
-
- alluxioInterpreter.interpret("load /testRoot", null);
-
- memPercentageA = fs.getStatus(new AlluxioURI("/testRoot/testFileA")).getInMemoryPercentage();
- memPercentageB = fs.getStatus(new AlluxioURI("/testRoot/testFileB")).getInMemoryPercentage();
- Assert.assertTrue(memPercentageA == 100);
- Assert.assertTrue(memPercentageB == 100);
- }
-
- @Test
- public void copyFromLocalTest() throws IOException, AlluxioException {
- File testDir = new File(mLocalAlluxioCluster.getAlluxioHome() + "/testDir");
- testDir.mkdir();
- File testDirInner = new File(mLocalAlluxioCluster.getAlluxioHome() + "/testDir/testDirInner");
- testDirInner.mkdir();
- File testFile =
- generateFileContent("/testDir/testFile", BufferUtils.getIncreasingByteArray(10));
-
- generateFileContent("/testDir/testDirInner/testFile2",
- BufferUtils.getIncreasingByteArray(10, 20));
-
- InterpreterResult output = alluxioInterpreter.interpret("copyFromLocal " +
- testFile.getParent() + " /testDir", null);
- Assert.assertEquals(
- "Copied " + testFile.getParent() + " to /testDir\n\n",
- output.message().get(0).getData());
-
- long fileLength1 = fs.getStatus(new AlluxioURI("/testDir/testFile")).getLength();
- long fileLength2 = fs.getStatus(new AlluxioURI("/testDir/testDirInner/testFile2")).getLength();
- Assert.assertEquals(10, fileLength1);
- Assert.assertEquals(20, fileLength2);
-
- FileInStream fStream1 = fs.openFile(new AlluxioURI("/testDir/testFile"));
- FileInStream fStream2 = fs.openFile(new AlluxioURI("/testDir/testDirInner/testFile2"));
- byte[] read = new byte[10];
- fStream1.read(read);
- Assert.assertTrue(BufferUtils.equalIncreasingByteArray(10, read));
- read = new byte[20];
- fStream2.read(read);
- Assert.assertTrue(BufferUtils.equalIncreasingByteArray(10, 20, read));
- }
-
- @Test
- public void copyFromLocalTestWithFullURI() throws IOException, AlluxioException {
- File testFile = generateFileContent("/srcFileURI", BufferUtils.getIncreasingByteArray(10));
- String uri = "tachyon://" + mLocalAlluxioCluster.getMasterHostname() + ":"
- + mLocalAlluxioCluster.getMasterPort() + "/destFileURI";
-
- InterpreterResult output = alluxioInterpreter.interpret("copyFromLocal " +
- testFile.getPath() + " " + uri, null);
- Assert.assertEquals(
- "Copied " + testFile.getPath() + " to " + uri + "\n\n",
- output.message().get(0).getData());
-
- long fileLength = fs.getStatus(new AlluxioURI("/destFileURI")).getLength();
- Assert.assertEquals(10L, fileLength);
-
- FileInStream fStream = fs.openFile(new AlluxioURI("/destFileURI"));
- byte[] read = new byte[10];
- fStream.read(read);
- Assert.assertTrue(BufferUtils.equalIncreasingByteArray(10, read));
- }
-
- @Test
- public void copyFromLocalFileToDstPathTest() throws IOException, AlluxioException {
- String dataString = "copyFromLocalFileToDstPathTest";
- byte[] data = dataString.getBytes();
- File localDir = new File(mLocalAlluxioCluster.getAlluxioHome() + "/localDir");
- localDir.mkdir();
- File localFile = generateFileContent("/localDir/testFile", data);
-
- alluxioInterpreter.interpret("mkdir /dstDir", null);
- alluxioInterpreter.interpret("copyFromLocal " + localFile.getPath() + " /dstDir", null);
-
- FileInStream fStream = fs.openFile(new AlluxioURI("/dstDir/testFile"));
- long fileLength = fs.getStatus(new AlluxioURI("/dstDir/testFile")).getLength();
-
- byte[] read = new byte[(int) fileLength];
- fStream.read(read);
- Assert.assertEquals(new String(read), dataString);
- }
-
- @Test
- public void copyToLocalLargeTest() throws IOException {
- copyToLocalWithBytes(SIZE_BYTES);
- }
-
- @Test
- public void copyToLocalTest() throws IOException {
- copyToLocalWithBytes(10);
- }
-
- private void copyToLocalWithBytes(int bytes) throws IOException {
- FileSystemTestUtils.createByteFile(fs, "/testFile", WriteType.MUST_CACHE, 10, 10);
+ void copyToLocalTest() throws IOException {
+ FileSystemTestUtils.createByteFile(fs, "/testFile", WritePType.MUST_CACHE, 10, 10);
InterpreterResult output = alluxioInterpreter.interpret("copyToLocal /testFile " +
mLocalAlluxioCluster.getAlluxioHome() + "/testFile", null);
- Assert.assertEquals(
- "Copied /testFile to " + mLocalAlluxioCluster.getAlluxioHome() + "/testFile\n\n",
+ assertEquals(
+ "Copied /testFile to file://" + mLocalAlluxioCluster.getAlluxioHome() + "/testFile\n\n",
output.message().get(0).getData());
fileReadTest("/testFile", 10);
}
@Test
- public void countNotExistTest() throws IOException {
- InterpreterResult output = alluxioInterpreter.interpret("count /NotExistFile", null);
- Assert.assertEquals(Code.ERROR, output.code());
- Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n",
- output.message().get(0).getData());
- }
-
- @Test
- public void countTest() throws IOException {
+ void countTest() throws IOException {
FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA",
- WriteType.CACHE_THROUGH, 10, 10);
+ WritePType.MUST_CACHE, 10);
FileSystemTestUtils.createByteFile(fs, "/testRoot/testDir/testFileB",
- WriteType.CACHE_THROUGH, 20, 20);
+ WritePType.MUST_CACHE, 20);
FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileB",
- WriteType.CACHE_THROUGH, 30, 30);
+ WritePType.MUST_CACHE, 30);
InterpreterResult output = alluxioInterpreter.interpret("count /testRoot", null);
String expected = "";
- String format = "%-25s%-25s%-15s\n";
- expected += String.format(format, "File Count", "Folder Count", "Total Bytes");
- expected += String.format(format, 3, 2, 60);
+ expected += String.format(COUNT_FORMAT, "File Count", "Folder Count", "Folder Size");
+ expected += String.format(COUNT_FORMAT, 3, 1, 60);
expected += "\n";
- Assert.assertEquals(expected, output.message().get(0).getData());
- }
+ assertEquals(expected, output.message().get(0).getData());
- @Test
- public void fileinfoNotExistTest() throws IOException {
- InterpreterResult output = alluxioInterpreter.interpret("fileInfo /NotExistFile", null);
- Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n",
- output.message().get(0).getData());
- Assert.assertEquals(Code.ERROR, output.code());
+ InterpreterResult output2 = alluxioInterpreter.interpret("count -h /testRoot", null);
+ String expected2 = "";
+ expected2 += String.format(COUNT_FORMAT, "File Count", "Folder Count", "Folder Size");
+ expected2 += String.format(COUNT_FORMAT, 3, 1, "60B");
+ expected2 += "\n";
+ assertEquals(expected2, output2.message().get(0).getData());
}
@Test
- public void locationNotExistTest() throws IOException {
- InterpreterResult output = alluxioInterpreter.interpret("location /NotExistFile", null);
- Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n",
- output.message().get(0).getData());
- Assert.assertEquals(Code.ERROR, output.code());
- }
-
- @Test
- public void lsTest() throws IOException, AlluxioException {
+ void lsTest() throws IOException, AlluxioException {
URIStatus[] files = new URIStatus[3];
FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA",
- WriteType.MUST_CACHE, 10, 10);
+ WritePType.MUST_CACHE, 10, 10);
FileSystemTestUtils.createByteFile(fs, "/testRoot/testDir/testFileB",
- WriteType.MUST_CACHE, 20, 20);
+ WritePType.MUST_CACHE, 20, 20);
FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileC",
- WriteType.THROUGH, 30, 30);
+ WritePType.THROUGH, 30, 30);
files[0] = fs.getStatus(new AlluxioURI("/testRoot/testFileA"));
files[1] = fs.getStatus(new AlluxioURI("/testRoot/testDir"));
@@ -365,120 +205,20 @@ public void lsTest() throws IOException, AlluxioException {
InterpreterResult output = alluxioInterpreter.interpret("ls /testRoot", null);
- String expected = "";
- String format = "%-10s%-25s%-15s%-5s\n";
- expected += String.format(format, FormatUtils.getSizeFromBytes(10),
- CommandUtils.convertMsToDate(files[0].getCreationTimeMs()), "In Memory",
- "/testRoot/testFileA");
- expected += String.format(format, FormatUtils.getSizeFromBytes(0),
- CommandUtils.convertMsToDate(files[1].getCreationTimeMs()), "", "/testRoot/testDir");
- expected += String.format(format, FormatUtils.getSizeFromBytes(30),
- CommandUtils.convertMsToDate(files[2].getCreationTimeMs()), "Not In Memory",
- "/testRoot/testFileC");
- expected += "\n";
-
- Assert.assertEquals(Code.SUCCESS, output.code());
- Assert.assertEquals(expected, output.message().get(0).getData());
- }
-
- @Test
- public void lsRecursiveTest() throws IOException, AlluxioException {
- URIStatus[] files = new URIStatus[4];
-
- FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA",
- WriteType.MUST_CACHE, 10, 10);
- FileSystemTestUtils.createByteFile(fs, "/testRoot/testDir/testFileB",
- WriteType.MUST_CACHE, 20, 20);
- FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileC",
- WriteType.THROUGH, 30, 30);
-
- files[0] = fs.getStatus(new AlluxioURI("/testRoot/testFileA"));
- files[1] = fs.getStatus(new AlluxioURI("/testRoot/testDir"));
- files[2] = fs.getStatus(new AlluxioURI("/testRoot/testDir/testFileB"));
- files[3] = fs.getStatus(new AlluxioURI("/testRoot/testFileC"));
-
- InterpreterResult output = alluxioInterpreter.interpret("ls -R /testRoot", null);
-
- String expected = "";
- String format = "%-10s%-25s%-15s%-5s\n";
- expected +=
- String.format(format, FormatUtils.getSizeFromBytes(10),
- CommandUtils.convertMsToDate(files[0].getCreationTimeMs()), "In Memory",
- "/testRoot/testFileA");
- expected +=
- String.format(format, FormatUtils.getSizeFromBytes(0),
- CommandUtils.convertMsToDate(files[1].getCreationTimeMs()), "",
- "/testRoot/testDir");
- expected +=
- String.format(format, FormatUtils.getSizeFromBytes(20),
- CommandUtils.convertMsToDate(files[2].getCreationTimeMs()), "In Memory",
- "/testRoot/testDir/testFileB");
- expected +=
- String.format(format, FormatUtils.getSizeFromBytes(30),
- CommandUtils.convertMsToDate(files[3].getCreationTimeMs()), "Not In Memory",
- "/testRoot/testFileC");
- expected += "\n";
-
- Assert.assertEquals(expected, output.message().get(0).getData());
- }
-
- @Test
- public void mkdirComplexPathTest() throws IOException, AlluxioException {
- InterpreterResult output = alluxioInterpreter.interpret(
- "mkdir /Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File", null);
-
- boolean existsDir = fs.exists(new AlluxioURI("/Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File"));
- Assert.assertEquals(
- "Successfully created directory /Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File\n\n",
- output.message().get(0).getData());
- Assert.assertTrue(existsDir);
- }
-
- @Test
- public void mkdirExistingTest() throws IOException {
- String command = "mkdir /festFile1";
- Assert.assertEquals(Code.SUCCESS, alluxioInterpreter.interpret(command, null).code());
- Assert.assertEquals(Code.ERROR, alluxioInterpreter.interpret(command, null).code());
- }
-
- @Test
- public void mkdirInvalidPathTest() throws IOException {
- Assert.assertEquals(
- Code.ERROR,
- alluxioInterpreter.interpret("mkdir /test File Invalid Path", null).code());
+ assertEquals(Code.SUCCESS, output.code());
}
@Test
- public void mkdirShortPathTest() throws IOException, AlluxioException {
- InterpreterResult output = alluxioInterpreter.interpret("mkdir /root/testFile1", null);
- boolean existsDir = fs.exists(new AlluxioURI("/root/testFile1"));
- Assert.assertEquals(
- "Successfully created directory /root/testFile1\n\n",
- output.message().get(0).getData());
- Assert.assertTrue(existsDir);
- }
-
- @Test
- public void mkdirTest() throws IOException, AlluxioException {
+ void mkdirTest() throws IOException, AlluxioException {
String qualifiedPath =
- "tachyon://" + mLocalAlluxioCluster.getMasterHostname() + ":"
- + mLocalAlluxioCluster.getMasterPort() + "/root/testFile1";
+ "alluxio://" + mLocalAlluxioCluster.getHostname() + ":"
+ + mLocalAlluxioCluster.getMasterRpcPort() + "/root/testFile1";
InterpreterResult output = alluxioInterpreter.interpret("mkdir " + qualifiedPath, null);
boolean existsDir = fs.exists(new AlluxioURI("/root/testFile1"));
- Assert.assertEquals(
+ assertEquals(
"Successfully created directory " + qualifiedPath + "\n\n",
output.message().get(0).getData());
- Assert.assertTrue(existsDir);
- }
-
- private File generateFileContent(String path, byte[] toWrite)
- throws IOException {
- File testFile = new File(mLocalAlluxioCluster.getAlluxioHome() + path);
- testFile.createNewFile();
- FileOutputStream fos = new FileOutputStream(testFile);
- fos.write(toWrite);
- fos.close();
- return testFile;
+ assertTrue(existsDir);
}
private void fileReadTest(String fileName, int size) throws IOException {
@@ -487,6 +227,6 @@ private void fileReadTest(String fileName, int size) throws IOException {
byte[] read = new byte[size];
fis.read(read);
fis.close();
- Assert.assertTrue(BufferUtils.equalIncreasingByteArray(size, read));
+ assertTrue(BufferUtils.equalIncreasingByteArray(size, read));
}
}
diff --git a/angular/pom.xml b/angular/pom.xml
index 609814a9bce..aa3989e8bfc 100644
--- a/angular/pom.xml
+++ b/angular/pom.xml
@@ -23,7 +23,7 @@
zeppelin-interpreter-parentorg.apache.zeppelin
- 0.10.0-SNAPSHOT
+ 0.12.0-SNAPSHOT../zeppelin-interpreter-parent/pom.xml
@@ -40,9 +40,6 @@
maven-enforcer-plugin
-
- maven-dependency-plugin
- maven-resources-plugin
@@ -52,9 +49,6 @@
org.apache.maven.pluginsmaven-checkstyle-plugin
-
- false
-
diff --git a/beam/README.md b/beam/README.md
deleted file mode 100644
index 948c95cfc0f..00000000000
--- a/beam/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Overview
-Beam interpreter for Apache Zeppelin
-
-# Architecture
-Current interpreter implementation supports the static repl. It compiles the code in memory, execute it and redirect the output to zeppelin.
-
-## Building the Beam Interpreter
-You have to first build the Beam interpreter by enable the **beam** profile as follows:
-
-```
-mvn clean package -Pbeam -DskipTests -Pscala-2.10
-```
-
-### Notice
-- Flink runner comes with binary compiled for scala 2.10. So, currently we support only Scala 2.10
-
-### Technical overview
-
- * Upon starting an interpreter, an instance of `JavaCompiler` is created.
-
- * When the user runs commands with beam, the `JavaParser` go through the code to get a class that contains the main method.
-
- * Then it replaces the class name with random class name to avoid overriding while compilation. it creates new out & err stream to get the data in new stream instead of the console, to redirect output to zeppelin.
-
- * If there is any error during compilation, it can catch and redirect to zeppelin.
diff --git a/beam/pom.xml b/beam/pom.xml
deleted file mode 100644
index 199d503d1db..00000000000
--- a/beam/pom.xml
+++ /dev/null
@@ -1,272 +0,0 @@
-
-
-
-
- 4.0.0
-
-
- zeppelin-interpreter-parent
- org.apache.zeppelin
- 0.10.0-SNAPSHOT
- ../zeppelin-interpreter-parent/pom.xml
-
-
- zeppelin-beam
- jar
- Zeppelin: Beam interpreter
-
-
- beam
-
- 2.3.0
- 1.6.2
- 2.0.0
- 2.10
-
-
- 4.1.42.Final
- 3.1.0
- 1.8.1
-
-
-
-
- io.netty
- netty-all
- ${netty.version}
-
-
-
- org.apache.spark
- spark-core_${beam.scala.binary.version}
- ${beam.spark.version}
-
-
- slf4j-log4j12
- org.slf4j
-
-
- netty-all
- io.netty
-
-
- akka-actor_${beam.scala.binary.version}
- org.spark-project.akka
-
-
- akka-remote_${beam.scala.binary.version}
- org.spark-project.akka
-
-
- akka-slf4j_${beam.scala.binary.version}
- org.spark-project.akka
-
-
-
-
-
- org.apache.spark
- spark-streaming_${beam.scala.binary.version}
- ${beam.spark.version}
-
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
- ${beam.hadoop.version}
-
-
- slf4j-log4j12
- org.slf4j
-
-
-
-
-
- org.apache.hadoop
- hadoop-common
- ${beam.hadoop.version}
-
-
- slf4j-log4j12
- org.slf4j
-
-
-
-
-
- org.apache.zeppelin
- zeppelin-java
- ${project.version}
-
-
-
- org.apache.zeppelin
- zeppelin-scio
- ${project.version}
-
-
-
- org.apache.hadoop
- hadoop-hdfs
- ${beam.hadoop.version}
-
-
-
- org.apache.hadoop
- hadoop-client
- ${beam.hadoop.version}
-
-
- slf4j-log4j12
- org.slf4j
-
-
-
-
-
- org.apache.hadoop
- hadoop-annotations
- ${beam.hadoop.version}
-
-
-
- org.apache.hadoop
- hadoop-yarn-common
- ${beam.hadoop.version}
-
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-common
- ${beam.hadoop.version}
-
-
- slf4j-log4j12
- org.slf4j
-
-
-
-
-
- com.thoughtworks.qdox
- qdox
- 2.0-M3
-
-
-
- org.apache.beam
- beam-runners-parent
- ${beam.beam.version}
- pom
-
-
-
- org.apache.beam
- beam-runners-core-java
- ${beam.beam.version}
-
-
- google-http-client-jackson2
- com.google.http-client
-
-
-
-
-
- org.apache.beam
- beam-runners-direct-java
- ${beam.beam.version}
-
-
-
- javax.servlet
- javax.servlet-api
- ${servlet.api.version}
-
-
-
- org.apache.beam
- beam-runners-google-cloud-dataflow-java
- ${beam.beam.version}
-
-
- google-http-client-jackson2
- com.google.http-client
-
-
-
-
-
- org.apache.beam
- beam-runners-spark
- ${beam.beam.version}
- jar
-
-
-
- org.apache.beam
- beam-runners-flink_${beam.scala.binary.version}
- ${beam.beam.version}
-
-
-
- ${project.groupId}
- zeppelin-interpreter-shaded
- ${project.version}
-
-
-
- org.apache.commons
- commons-exec
- ${commons.exec.version}
-
-
-
- org.apache.avro
- avro
- ${avro.version}
-
-
-
-
-
-
-
- maven-enforcer-plugin
-
-
- maven-dependency-plugin
-
-
- maven-resources-plugin
-
-
- maven-shade-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
- false
-
-
-
-
-
diff --git a/beam/src/main/resources/interpreter-setting.json b/beam/src/main/resources/interpreter-setting.json
deleted file mode 100644
index e9b4a73c2ad..00000000000
--- a/beam/src/main/resources/interpreter-setting.json
+++ /dev/null
@@ -1,37 +0,0 @@
-[
- {
- "group": "beam",
- "name": "beam",
- "className": "org.apache.zeppelin.beam.BeamInterpreter",
- "defaultInterpreter": true,
- "properties": {
- },
- "editor": {
- "editOnDblClick": false
- }
- },
- {
- "group": "beam",
- "name": "scio",
- "className": "org.apache.zeppelin.scio.ScioInterpreter",
- "properties": {
- "zeppelin.scio.argz": {
- "envName": "ZEPPELIN_SCIO_ARGZ",
- "propertyName": "zeppelin.scio.argz",
- "defaultValue": "--runner=InProcessPipelineRunner",
- "description": "Scio interpreter wide arguments",
- "type": "textarea"
- },
- "zeppelin.scio.maxResult": {
- "envName": "ZEPPELIN_SCIO_MAXRESULT",
- "propertyName": "zeppelin.scio.maxResult",
- "defaultValue": "1000",
- "description": "Max number of SCollection results to display.",
- "type": "number"
- }
- },
- "editor": {
- "language": "scala"
- }
- }
-]
diff --git a/beam/src/test/org/apache/zeppelin/beam/BeamInterpreterTest.java b/beam/src/test/org/apache/zeppelin/beam/BeamInterpreterTest.java
deleted file mode 100644
index d1c56ee06ec..00000000000
--- a/beam/src/test/org/apache/zeppelin/beam/BeamInterpreterTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.zeppelin.beam;
-
-import org.apache.zeppelin.interpreter.InterpreterContext;
-import org.apache.zeppelin.interpreter.InterpreterResult;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.Properties;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * BeamInterpreterTest
- */
-public class BeamInterpreterTest {
-
- private static BeamInterpreter beam;
- private static InterpreterContext context;
-
- @BeforeClass
- public static void setUp() {
- Properties p = new Properties();
- beam = new BeamInterpreter(p);
- beam.open();
- context = InterpreterContext.builder().build();
- }
-
- @AfterClass
- public static void tearDown() {
- beam.close();
- }
-
- @Test
- public void testStaticRepl() {
-
- StringWriter writer = new StringWriter();
- PrintWriter out = new PrintWriter(writer);
- out.println("public class HelloWorld {");
- out.println(" public static void main(String args[]) {");
- out.println(" System.out.println(\"This is in another java file\");");
- out.println(" }");
- out.println("}");
- out.close();
-
- InterpreterResult res = beam.interpret(writer.toString(), context);
-
- assertEquals(InterpreterResult.Code.SUCCESS, res.code());
- }
-
- @Test
- public void testStaticReplWithoutMain() {
-
- StringBuffer sourceCode = new StringBuffer();
- sourceCode.append("package org.mdkt;\n");
- sourceCode.append("public class HelloClass {\n");
- sourceCode.append(" public String hello() { return \"hello\"; }");
- sourceCode.append("}");
- InterpreterResult res = beam.interpret(sourceCode.toString(), context);
- assertEquals(InterpreterResult.Code.ERROR, res.code());
- }
-
- @Test
- public void testStaticReplWithSyntaxError() {
-
- StringWriter writer = new StringWriter();
- PrintWriter out = new PrintWriter(writer);
- out.println("public class HelloWorld {");
- out.println(" public static void main(String args[]) {");
- out.println(" System.out.prin(\"This is in another java file\");");
- out.println(" }");
- out.println("}");
- out.close();
- InterpreterResult res = beam.interpret(writer.toString(), context);
-
- assertEquals(InterpreterResult.Code.ERROR, res.code());
- }
-
-}
diff --git a/bigquery/README.md b/bigquery/README.md
index 0dff5feb7c8..024d81167da 100644
--- a/bigquery/README.md
+++ b/bigquery/README.md
@@ -8,7 +8,7 @@ If you like to run these tests manually, please follow the following steps:
* [Create a new project](https://support.google.com/cloud/answer/6251787?hl=en)
* [Create a Google Compute Engine instance](https://cloud.google.com/compute/docs/instances/create-start-instance)
* Copy the project ID that you created and add it to the property "projectId" in `resources/constants.json`
-* Run the command mvn -Dbigquery.text.exclude='' test -pl bigquery -am
+* Run the command ./mvnw -Dbigquery.text.exclude='' test -pl bigquery -am
# Connection
The Interpreter opens a connection with the BigQuery Service using the supplied Google project ID and the compute environment variables.
diff --git a/bigquery/pom.xml b/bigquery/pom.xml
index b689d926e5d..a101374d877 100644
--- a/bigquery/pom.xml
+++ b/bigquery/pom.xml
@@ -23,7 +23,7 @@
zeppelin-interpreter-parentorg.apache.zeppelin
- 0.10.0-SNAPSHOT
+ 0.12.0-SNAPSHOT../zeppelin-interpreter-parent/pom.xml
@@ -39,7 +39,6 @@
v2-rev20190917-1.30.3
- 2.8.624.1.1-jrebigquery
@@ -77,6 +76,10 @@
guava${guava.version}
+
+ org.apache.commons
+ commons-lang3
+
@@ -85,9 +88,6 @@
maven-enforcer-plugin
-
- maven-dependency-plugin
- maven-resources-plugin
@@ -121,9 +121,6 @@
org.apache.maven.pluginsmaven-checkstyle-plugin
-
- false
-
diff --git a/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java b/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java
index 0973fda0df2..c23bd228e71 100644
--- a/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java
+++ b/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java
@@ -37,6 +37,7 @@
import com.google.api.services.bigquery.model.TableRow;
import com.google.common.base.Function;
+import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -79,7 +80,7 @@
*
*/
public class BigQueryInterpreter extends Interpreter {
- private static Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryInterpreter.class);
private static final char NEWLINE = '\n';
private static final char TAB = '\t';
private static Bigquery service = null;
@@ -90,6 +91,7 @@ public class BigQueryInterpreter extends Interpreter {
static final String WAIT_TIME = "zeppelin.bigquery.wait_time";
static final String MAX_ROWS = "zeppelin.bigquery.max_no_of_rows";
static final String SQL_DIALECT = "zeppelin.bigquery.sql_dialect";
+ static final String REGION = "zeppelin.bigquery.region";
private static String jobId = null;
private static String projectId = null;
@@ -117,9 +119,9 @@ public void open() {
try {
service = createAuthorizedClient();
exceptionOnConnect = null;
- logger.info("Opened BigQuery SQL Connection");
+ LOGGER.info("Opened BigQuery SQL Connection");
} catch (IOException e) {
- logger.error("Cannot open connection", e);
+ LOGGER.error("Cannot open connection", e);
exceptionOnConnect = e;
close();
}
@@ -227,6 +229,7 @@ private InterpreterResult executeSql(String sql) {
long wTime = Long.parseLong(getProperty(WAIT_TIME));
long maxRows = Long.parseLong(getProperty(MAX_ROWS));
String sqlDialect = getProperty(SQL_DIALECT, "").toLowerCase();
+ String region = getProperty(REGION, null);
Boolean useLegacySql;
switch (sqlDialect) {
case "standardsql":
@@ -241,9 +244,9 @@ private InterpreterResult executeSql(String sql) {
}
Iterator pages;
try {
- pages = run(sql, projId, wTime, maxRows, useLegacySql);
+ pages = run(sql, projId, wTime, maxRows, useLegacySql, region);
} catch (IOException ex) {
- logger.error(ex.getMessage());
+ LOGGER.error(ex.getMessage());
return new InterpreterResult(Code.ERROR, ex.getMessage());
}
try {
@@ -258,10 +261,11 @@ private InterpreterResult executeSql(String sql) {
//Function to run the SQL on bigQuery service
public static Iterator run(final String queryString,
- final String projId, final long wTime, final long maxRows, Boolean useLegacySql)
- throws IOException {
+ final String projId, final long wTime, final long maxRows,
+ Boolean useLegacySql, final String region)
+ throws IOException {
try {
- logger.info("Use legacy sql: {}", useLegacySql);
+ LOGGER.info("Use legacy sql: {}", useLegacySql);
QueryResponse query;
query = service
.jobs()
@@ -275,6 +279,9 @@ public static Iterator run(final String queryString,
GetQueryResults getRequest = service.jobs().getQueryResults(
projectId,
jobId);
+ if (StringUtils.isNotBlank(region)) {
+ getRequest = getRequest.setLocation(region);
+ }
return getPages(getRequest);
} catch (IOException ex) {
throw ex;
@@ -283,14 +290,14 @@ public static Iterator run(final String queryString,
@Override
public void close() {
- logger.info("Close bqsql connection!");
+ LOGGER.info("Close bqsql connection!");
service = null;
}
@Override
public InterpreterResult interpret(String sql, InterpreterContext contextInterpreter) {
- logger.info("Run SQL command '{}'", sql);
+ LOGGER.info("Run SQL command '{}'", sql);
return executeSql(sql);
}
@@ -312,19 +319,19 @@ public int getProgress(InterpreterContext context) {
@Override
public void cancel(InterpreterContext context) {
- logger.info("Trying to Cancel current query statement.");
+ LOGGER.info("Trying to Cancel current query statement.");
if (service != null && jobId != null && projectId != null) {
try {
Bigquery.Jobs.Cancel request = service.jobs().cancel(projectId, jobId);
JobCancelResponse response = request.execute();
jobId = null;
- logger.info("Query Execution cancelled");
+ LOGGER.info("Query Execution cancelled");
} catch (IOException ex) {
- logger.error("Could not cancel the SQL execution");
+ LOGGER.error("Could not cancel the SQL execution");
}
} else {
- logger.info("Query Execution was already cancelled");
+ LOGGER.info("Query Execution was already cancelled");
}
}
diff --git a/bigquery/src/main/resources/interpreter-setting.json b/bigquery/src/main/resources/interpreter-setting.json
index 8023bed1522..989cc375d96 100644
--- a/bigquery/src/main/resources/interpreter-setting.json
+++ b/bigquery/src/main/resources/interpreter-setting.json
@@ -31,6 +31,13 @@
"defaultValue": "",
"description": "BigQuery SQL dialect (standardSQL or legacySQL). If empty, query prefix like '#standardSQL' can be used.",
"type": "string"
+ },
+ "zeppelin.bigquery.region": {
+ "envName": null,
+ "propertyName": "zeppelin.bigquery.region",
+ "defaultValue": "",
+ "description": "Location of BigQuery dataset. Needed if it is a single-region dataset.",
+ "type": "string"
}
},
"editor": {
diff --git a/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java b/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java
index 9dcd9f8c61d..630530aa948 100644
--- a/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java
+++ b/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java
@@ -16,16 +16,9 @@
package org.apache.zeppelin.bigquery;
-import static org.junit.Assert.assertEquals;
-
import com.google.gson.Gson;
-import com.google.gson.JsonIOException;
-import com.google.gson.JsonSyntaxException;
-
-import org.junit.Before;
-import org.junit.Test;
+import static org.junit.jupiter.api.Assertions.assertEquals;
-import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Properties;
@@ -33,8 +26,11 @@
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterResult;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
-public class BigQueryInterpreterTest {
+class BigQueryInterpreterTest {
protected static class Constants {
private String projectId;
private String oneQuery;
@@ -55,12 +51,10 @@ public String getWrong() {
protected static Constants constants = null;
- public BigQueryInterpreterTest()
- throws JsonSyntaxException, JsonIOException, FileNotFoundException {
- if (constants == null) {
- InputStream is = this.getClass().getResourceAsStream("/constants.json");
- constants = (new Gson()).fromJson(new InputStreamReader(is), Constants.class);
- }
+ @BeforeAll
+ public static void initConstants() {
+ InputStream is = ClassLoader.class.getResourceAsStream("/constants.json");
+ constants = (new Gson()). fromJson(new InputStreamReader(is), Constants.class);
}
private InterpreterGroup intpGroup;
@@ -68,7 +62,7 @@ public BigQueryInterpreterTest()
private InterpreterContext context;
- @Before
+ @BeforeEach
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("zeppelin.bigquery.project_id", constants.getProjectId());
@@ -84,27 +78,27 @@ public void setUp() throws Exception {
}
@Test
- public void sqlSuccess() {
+ void sqlSuccess() {
InterpreterResult ret = bqInterpreter.interpret(constants.getOne(), context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
- assertEquals(ret.message().get(0).getType(), InterpreterResult.Type.TABLE);
+ assertEquals(InterpreterResult.Type.TABLE, ret.message().get(0).getType());
}
@Test
- public void badSqlSyntaxFails() {
+ void badSqlSyntaxFails() {
InterpreterResult ret = bqInterpreter.interpret(constants.getWrong(), context);
assertEquals(InterpreterResult.Code.ERROR, ret.code());
}
@Test
- public void testWithQueryPrefix() {
+ void testWithQueryPrefix() {
InterpreterResult ret = bqInterpreter.interpret(
"#standardSQL\n WITH t AS (select 1) SELECT * FROM t", context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
}
@Test
- public void testInterpreterOutputData() {
+ void testInterpreterOutputData() {
InterpreterResult ret = bqInterpreter.interpret("SELECT 1 AS col1, 2 AS col2", context);
String[] lines = ret.message().get(0).getData().split("\\n");
assertEquals(2, lines.length);
diff --git a/bigquery/src/main/resources/constants.json b/bigquery/src/test/resources/constants.json
similarity index 100%
rename from bigquery/src/main/resources/constants.json
rename to bigquery/src/test/resources/constants.json
diff --git a/submarine/src/test/resources/log4j.properties b/bigquery/src/test/resources/log4j.properties
similarity index 68%
rename from submarine/src/test/resources/log4j.properties
rename to bigquery/src/test/resources/log4j.properties
index 9c22fdc83bd..b724845905d 100644
--- a/submarine/src/test/resources/log4j.properties
+++ b/bigquery/src/test/resources/log4j.properties
@@ -27,21 +27,4 @@ log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c:%L - %m%n
# Root logger option
log4j.rootLogger=INFO, stdout
-
-#mute some noisy guys
-log4j.logger.org.apache.hadoop.mapred=WARN
-log4j.logger.org.apache.hadoop.hive.ql=WARN
-log4j.logger.org.apache.hadoop.hive.metastore=WARN
-log4j.logger.org.apache.haadoop.hive.service.HiveServer=WARN
-
-log4j.logger.org.quartz=WARN
-log4j.logger.DataNucleus=WARN
-log4j.logger.DataNucleus.MetaData=ERROR
-log4j.logger.DataNucleus.Datastore=ERROR
-
-# Log all JDBC parameters
-log4j.logger.org.hibernate.type=ALL
-log4j.logger.org.apache.hadoop=WARN
-
-log4j.logger.org.apache.zeppelin.interpreter=DEBUG
-log4j.logger.org.apache.zeppelin.scheduler=DEBUG
+#log4j.logger.org.apache.zeppelin.interpreter=DEBUG
diff --git a/bin/common.sh b/bin/common.sh
index fb533d902a2..56f8aa45ff3 100644
--- a/bin/common.sh
+++ b/bin/common.sh
@@ -172,6 +172,10 @@ fi
export ZEPPELIN_RUNNER
if [[ -z "$ZEPPELIN_IDENT_STRING" ]]; then
+ # if for some reason the shell doesn't have $USER defined
+ # (e.g., ssh'd in to execute a command)
+ # let's get the effective username and use that
+ USER=${USER:-$(id -nu)}
export ZEPPELIN_IDENT_STRING="${USER}"
fi
diff --git a/bin/interpreter.sh b/bin/interpreter.sh
index c75a2990eb5..aaa9b0a15a0 100755
--- a/bin/interpreter.sh
+++ b/bin/interpreter.sh
@@ -101,6 +101,9 @@ fi
. "${bin}/common.sh"
+# Escape envs
+ZEPPELIN_INTP_CLASSPATH_OVERRIDES=$(printf %q "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}")
+
check_java_version
ZEPPELIN_INTERPRETER_API_JAR=$(find "${ZEPPELIN_HOME}/interpreter" -name 'zeppelin-interpreter-shaded-*.jar')
@@ -179,29 +182,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="${py4j[0]}:$PYTHONPATH"
else
- # add Hadoop jars into classpath
- if [[ -n "${HADOOP_HOME}" ]]; then
- # Apache
- addEachJarInDirRecursiveForIntp "${HADOOP_HOME}/share"
-
- # CDH
- addJarInDirForIntp "${HADOOP_HOME}"
- addJarInDirForIntp "${HADOOP_HOME}/lib"
- fi
-
- addJarInDirForIntp "${INTERPRETER_DIR}/dep"
-
- py4j=("${ZEPPELIN_HOME}"/interpreter/spark/pyspark/py4j-*-src.zip)
- # pick the first match py4j zip - there should only be one
- PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}"
-
- if [[ -z "${PYTHONPATH}" ]]; then
- export PYTHONPATH="${PYSPARKPATH}"
- else
- export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}"
- fi
- unset PYSPARKPATH
- export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
+ echo "No SPARK_HOME is specified"
+ exit -1
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
@@ -226,28 +208,7 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
-elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
- # autodetect HADOOP_CONF_HOME by heuristic
- if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
- if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
- export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
- elif [[ -d "/etc/hadoop/conf" ]]; then
- export HADOOP_CONF_DIR="/etc/hadoop/conf"
- fi
- fi
-
- if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
- ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
- fi
- # autodetect TEZ_CONF_DIR
- if [[ -n "${TEZ_CONF_DIR}" ]]; then
- ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
- elif [[ -d "/etc/tez/conf" ]]; then
- ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
- else
- echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
- fi
elif [[ "${INTERPRETER_ID}" == "flink" ]]; then
addEachJarInDirRecursiveForIntp "${FLINK_HOME}/lib"
@@ -300,13 +261,13 @@ if [[ -n "${SPARK_SUBMIT}" ]]; then
IFS=' ' read -r -a SPARK_SUBMIT_OPTIONS_ARRAY <<< "${SPARK_SUBMIT_OPTIONS}"
IFS='|' read -r -a ZEPPELIN_SPARK_CONF_ARRAY <<< "${ZEPPELIN_SPARK_CONF}"
if [[ "${ZEPPELIN_SPARK_YARN_CLUSTER}" == "true" ]]; then
- INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-java-options" "${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}")
+ INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-java-options" "${SPARK_DRIVER_EXTRAJAVAOPTIONS_CONF} ${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}")
else
- INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-class-path" "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}" "--driver-java-options" "${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}")
+ INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-class-path" "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}" "--driver-java-options" "${SPARK_DRIVER_EXTRAJAVAOPTIONS_CONF} ${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}")
fi
-elif [[ "${ZEPPELIN_FLINK_YARN_APPLICATION}" == "true" ]]; then
- IFS='|' read -r -a ZEPPELIN_FLINK_YARN_APPLICATION_CONF_ARRAY <<< "${ZEPPELIN_FLINK_YARN_APPLICATION_CONF}"
- INTERPRETER_RUN_COMMAND+=("${FLINK_HOME}/bin/flink" "run-application" "-c" "${ZEPPELIN_SERVER}" "-t" "yarn-application" "${ZEPPELIN_FLINK_YARN_APPLICATION_CONF_ARRAY[@]}" "${FLINK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}")
+elif [[ -n "${ZEPPELIN_FLINK_APPLICATION_MODE}" ]]; then
+ IFS='|' read -r -a ZEPPELIN_FLINK_APPLICATION_MODE_CONF_ARRAY <<< "${ZEPPELIN_FLINK_APPLICATION_MODE_CONF}"
+ INTERPRETER_RUN_COMMAND+=("${FLINK_HOME}/bin/flink" "run-application" "-c" "${ZEPPELIN_SERVER}" "-t" "${ZEPPELIN_FLINK_APPLICATION_MODE}" "${ZEPPELIN_FLINK_APPLICATION_MODE_CONF_ARRAY[@]}" "${FLINK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}")
else
IFS=' ' read -r -a JAVA_INTP_OPTS_ARRAY <<< "${JAVA_INTP_OPTS}"
IFS=' ' read -r -a ZEPPELIN_INTP_MEM_ARRAY <<< "${ZEPPELIN_INTP_MEM}"
diff --git a/bin/upgrade-note.sh b/bin/upgrade-note.sh
deleted file mode 100755
index e68ef784038..00000000000
--- a/bin/upgrade-note.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Convert note format from 0.9.0 before to 0.9.0 after
-#
-
-USAGE="Usage: bin/upgrade-note.sh [-d]"
-
-bin=$(dirname "${BASH_SOURCE-$0}")
-bin=$(cd "${bin}">/dev/null; pwd)
-
-. "${bin}/common.sh"
-
-JAVA_OPTS="-Dzeppelin.log.file=logs/upgrade-note.log"
-MAIN_CLASS=org.apache.zeppelin.notebook.repo.UpgradeNoteFileTool
-
-# construct classpath
-if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then
- ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
-fi
-
-if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/classes" ]]; then
- ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/classes"
-fi
-
-if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then
- ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes"
-fi
-
-addJarInDir "${ZEPPELIN_HOME}"
-addJarInDir "${ZEPPELIN_HOME}/lib"
-addJarInDir "${ZEPPELIN_HOME}/lib/interpreter"
-addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
-addJarInDir "${ZEPPELIN_HOME}/zeppelin-zengine/target/lib"
-addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib"
-
-ZEPPELIN_CLASSPATH="$CLASSPATH:$ZEPPELIN_CLASSPATH"
-
-## Add hadoop jars when env USE_HADOOP is true
-if [[ "${USE_HADOOP}" != "false" ]]; then
- if [[ -z "${HADOOP_CONF_DIR}" ]]; then
- echo "Please specify HADOOP_CONF_DIR if USE_HADOOP is true"
- else
- ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
- if ! [ -x "$(command -v hadoop)" ]; then
- echo 'hadoop command is not in PATH when HADOOP_CONF_DIR is specified.'
- else
- ZEPPELIN_CLASSPATH+=":`hadoop classpath`"
- fi
- fi
-fi
-
-exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:${ZEPPELIN_CLASSPATH} $MAIN_CLASS "$@"
diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh
index f3c31ff9168..b3090f19398 100755
--- a/bin/zeppelin-daemon.sh
+++ b/bin/zeppelin-daemon.sh
@@ -145,14 +145,20 @@ function wait_zeppelin_is_up_for_ci() {
if [[ "${CI}" == "true" ]]; then
local count=0;
while [[ "${count}" -lt 30 ]]; do
+ # check with angular webapp path
curl -v localhost:8080 2>&1 | grep '200 OK'
- if [[ $? -ne 0 ]]; then
- sleep 1
- continue
- else
+ if [[ $? -eq 0 ]]; then
break
fi
- let "count+=1"
+
+ # check with classic webapp path
+ curl -v localhost:8080/classic/ 2>&1 | grep '200 OK'
+ if [[ $? -eq 0 ]]; then
+ break
+ fi
+
+ sleep 1
+ let "count+=1"
done
fi
}
diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh
index ef85eaad03a..efd3aae8b8a 100755
--- a/bin/zeppelin.sh
+++ b/bin/zeppelin.sh
@@ -30,7 +30,7 @@ if [ -f /proc/self/cgroup ] && [ -n "$(command -v getent)" ]; then
set +e
uidentry="$(getent passwd "$myuid")"
set -e
-
+
# If there is no passwd entry for the container UID, attempt to create one
if [ -z "$uidentry" ] ; then
if [ -w /etc/passwd ] ; then
@@ -115,8 +115,18 @@ addJarInDir "${ZEPPELIN_HOME}/zeppelin-web-angular/target/lib"
ZEPPELIN_CLASSPATH="$CLASSPATH:$ZEPPELIN_CLASSPATH"
-if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
- ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
+## Add hadoop jars when env USE_HADOOP is true
+if [[ "${USE_HADOOP}" != "false" ]]; then
+ if [[ -z "${HADOOP_CONF_DIR}" ]]; then
+ echo "Please specify HADOOP_CONF_DIR if USE_HADOOP is true"
+ else
+ ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
+ if ! [ -x "$(command -v hadoop)" ]; then
+ echo 'hadoop command is not in PATH when HADOOP_CONF_DIR is specified.'
+ else
+ ZEPPELIN_CLASSPATH+=":`hadoop classpath`"
+ fi
+ fi
fi
if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then
diff --git a/build-tools/pom.xml b/build-tools/pom.xml
new file mode 100644
index 00000000000..5559f2ff347
--- /dev/null
+++ b/build-tools/pom.xml
@@ -0,0 +1,27 @@
+
+
+ 4.0.0
+ Zeppelin: Tools
+ Zeppelin Tools
+ build-tools
+
+ org.apache.zeppelin
+ zeppelin
+ 0.12.0-SNAPSHOT
+
+
\ No newline at end of file
diff --git a/_tools/checkstyle.xml b/build-tools/src/main/resources/zeppelin/checkstyle.xml
similarity index 100%
rename from _tools/checkstyle.xml
rename to build-tools/src/main/resources/zeppelin/checkstyle.xml
diff --git a/cassandra/pom.xml b/cassandra/pom.xml
index d4ec018382c..7b9ea6cc7ed 100644
--- a/cassandra/pom.xml
+++ b/cassandra/pom.xml
@@ -21,7 +21,7 @@
zeppelin-interpreter-parentorg.apache.zeppelin
- 0.10.0-SNAPSHOT
+ 0.12.0-SNAPSHOT../zeppelin-interpreter-parent/pom.xml
@@ -31,15 +31,19 @@
Zeppelin cassandra support
- 4.8.0
- 1.1.7.3
- 1.6.0
- 1.7.1
+ 4.14.1
+ 1.1.10.4
+ 1.8.0
+ 1.9.8
- 4.2.0
+ 5.12.14.3.1.0
+ ${scala.2.12.version}
+ 2.12
+ 1.1.2
+
cassandra
@@ -100,13 +104,6 @@
runtime
-
-
org.apache.commonscommons-lang3
@@ -118,6 +115,12 @@
${scalate.version}
+
+ org.scala-lang.modules
+ scala-parser-combinators_${scala.binary.version}
+ ${scala.parser.combinators}
+
+
org.scalatest
@@ -126,6 +129,13 @@
test
+
+ org.scalacheck
+ scalacheck_${scala.binary.version}
+ ${scalacheck.version}
+ test
+
+
net.java.dev.jnajna
@@ -152,12 +162,6 @@
mockito-coretest
-
-
- org.assertj
- assertj-core
- test
-
@@ -174,30 +178,50 @@
- org.scala-tools
- maven-scala-plugin
+ net.alchim31.maven
+ scala-maven-plugin
- compile
+ eclipse-add-source
- compile
+ add-source
- compile
- test-compile
+ scala-compile-first
+ process-resources
- testCompile
+ compile
- test-compile
- process-resources
+ scala-test-compile-first
+ process-test-resources
- compile
+ testCompile
+
+
+ -unchecked
+ -deprecation
+ -feature
+ -nobootcp
+
+
+ -Xms1024m
+ -Xmx1024m
+ -XX:MaxMetaspaceSize=${MaxMetaspace}
+
+
+ -source
+ ${java.version}
+ -target
+ ${java.version}
+ -Xlint:all,-serial,-path,-options
+
+
@@ -213,30 +237,9 @@
-
- org.scalatra.scalate
- maven-scalate-plugin_${scala.binary.version}
-
-
- compile
- process-classes
-
- precompile
-
-
- ${basedir}/src/main/resources/scalate
- org.fusesource.scalate.DefaultRenderContext
-
-
-
-
-
maven-enforcer-plugin
-
- maven-dependency-plugin
- maven-resources-plugin
@@ -246,9 +249,6 @@
org.apache.maven.pluginsmaven-checkstyle-plugin
-
- false
-
diff --git a/cassandra/src/main/resources/scalate/helpMenu.ssp b/cassandra/src/main/resources/scalate/helpMenu.ssp
index 8ed2ae2564a..80fc99413a4 100644
--- a/cassandra/src/main/resources/scalate/helpMenu.ssp
+++ b/cassandra/src/main/resources/scalate/helpMenu.ssp
@@ -219,7 +219,7 @@
udt values should be enclosed between brackets
- (see udt CQL syntax):
+ (see udt CQL syntax):
{stree_name: ‘Beverly Hills’, number: 104, zip_code: 90020, state: ‘California’, …}
@@ -834,7 +834,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;
Instead of hard-coding your CQL queries, it is possible to use
- Zeppelin dynamic form
+ Zeppelin dynamic form syntax to inject simple value or multiple choices forms.
The legacy mustache syntax ( {{ }} ) to bind input text and select form is still supported but is deprecated and will be removed in future releases.
@@ -1050,7 +1050,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;Asynchronous execution is only possible when it is possible to return a Future value in the InterpreterResult. It may be an interesting proposal for the Zeppelin project.
Long story short, you have 3 available bindings:
@@ -1137,7 +1137,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;
- If you encounter a bug for this interpreter, please create a JIRA ticket.
+ If you encounter a bug for this interpreter, please create a JIRA ticket.
diff --git a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala
index 5529a71859c..c3007f436f8 100644
--- a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala
+++ b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala
@@ -380,7 +380,7 @@ class InterpreterLogic(val session: CqlSession, val properties: Properties) {
findInAngularRepository(variable) match {
case Some(value) => statement.replaceAll(escapedExp,value.toString)
case None =>
- val value = context.getGui.input(variable, defaultVal)
+ val value = context.getGui.textbox(variable, defaultVal)
statement.replaceAll(escapedExp, value.toString)
}
diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java
index 5e4c9946117..8a6cce4ee9e 100644
--- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java
+++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java
@@ -19,6 +19,8 @@
import com.datastax.oss.driver.api.core.CqlSession;
import com.datastax.oss.driver.api.core.config.DefaultDriverOption;
import com.datastax.oss.driver.api.core.config.DriverExecutionProfile;
+
+import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.Interpreter;
@@ -28,15 +30,13 @@
import org.cassandraunit.CQLDataLoader;
import org.cassandraunit.dataset.cql.ClassPathCQLDataSet;
import org.cassandraunit.utils.EmbeddedCassandraServerHelper;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
-import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Properties;
@@ -61,18 +61,20 @@
import static org.apache.zeppelin.cassandra.CassandraInterpreter.CASSANDRA_SOCKET_READ_TIMEOUT_MILLIS;
import static org.apache.zeppelin.cassandra.CassandraInterpreter.CASSANDRA_SOCKET_TCP_NO_DELAY;
import static org.apache.zeppelin.cassandra.CassandraInterpreter.CASSANDRA_SPECULATIVE_EXECUTION_POLICY;
-import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
-public class CassandraInterpreterTest { //extends AbstractCassandraUnit4CQLTestCase {
+public class CassandraInterpreterTest { // extends AbstractCassandraUnit4CQLTestCase {
private static final String ARTISTS_TABLE = "zeppelin.artists";
private static volatile CassandraInterpreter interpreter;
private final InterpreterContext intrContext = InterpreterContext.builder()
- .setParagraphTitle("Paragraph1")
- .build();
+ .setParagraphTitle("Paragraph1")
+ .build();
- @BeforeClass
+ @BeforeAll
public static synchronized void setUp() throws IOException, InterruptedException {
System.setProperty("cassandra.skip_wait_for_gossip_to_settle", "0");
System.setProperty("cassandra.load_ring_state", "false");
@@ -111,44 +113,43 @@ public static synchronized void setUp() throws IOException, InterruptedException
properties.setProperty(CASSANDRA_HOSTS, EmbeddedCassandraServerHelper.getHost());
properties.setProperty(CASSANDRA_PORT,
- Integer.toString(EmbeddedCassandraServerHelper.getNativeTransportPort()));
+ Integer.toString(EmbeddedCassandraServerHelper.getNativeTransportPort()));
properties.setProperty("datastax-java-driver.advanced.connection.pool.local.size", "1");
interpreter = new CassandraInterpreter(properties);
interpreter.open();
}
- @AfterClass
+ @AfterAll
public static void tearDown() {
interpreter.close();
}
@Test
- public void should_create_cluster_and_session_upon_call_to_open(){
- assertThat(interpreter.session).isNotNull();
- assertThat(interpreter.helper).isNotNull();
+ void should_create_cluster_and_session_upon_call_to_open() {
+ assertNotNull(interpreter.session);
+ assertNotNull(interpreter.helper);
}
@Test
- public void should_set_custom_option() {
- assertThat(interpreter.session).isNotNull();
+ void should_set_custom_option() {
+ assertNotNull(interpreter.session);
DriverExecutionProfile config = interpreter.session.getContext()
- .getConfig().getDefaultProfile();
- assertThat(config.getInt(DefaultDriverOption.CONNECTION_POOL_LOCAL_SIZE, 10))
- .isEqualTo(1);
+ .getConfig().getDefaultProfile();
+ assertEquals(1, config.getInt(DefaultDriverOption.CONNECTION_POOL_LOCAL_SIZE, 10));
}
@Test
- public void should_interpret_simple_select() {
- //Given
+ void should_interpret_simple_select() {
+ // Given
- //When
+ // When
final InterpreterResult actual = interpreter.interpret("SELECT * FROM " + ARTISTS_TABLE +
- " LIMIT 10;", intrContext);
+ " LIMIT 10;", intrContext);
- //Then
- assertThat(actual).isNotNull();
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("name\tborn\tcountry\tdied\tgender\t" +
+ // Then
+ assertNotNull(actual);
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("name\tborn\tcountry\tdied\tgender\t" +
"styles\ttype\n" +
"Bogdan Raczynski\t1977-01-01\tPoland\tnull\tMale\t" +
"[Dance, Electro]\tPerson\n" +
@@ -163,60 +164,60 @@ public void should_interpret_simple_select() {
"[Rock, Pop, Classic]\tPerson\n" +
"Los Paranoias\tnull\tUnknown\tnull\tnull\t[Unknown]\tnull\n" +
"…And You Will Know Us by the Trail of Dead\t1994-01-01\tUSA\tnull\tnull\t" +
- "[Rock, Pop, Classic]\tGroup\n");
+ "[Rock, Pop, Classic]\tGroup\n", actual.message().get(0).getData());
}
@Test
- public void should_interpret_select_statement() {
- //Given
+ void should_interpret_select_statement() {
+ // Given
- //When
+ // When
final InterpreterResult actual = interpreter.interpret("SELECT * FROM " + ARTISTS_TABLE +
- " LIMIT 2;", intrContext);
+ " LIMIT 2;", intrContext);
- //Then
- assertThat(actual).isNotNull();
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData())
- .isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" +
+ // Then
+ assertNotNull(actual);
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" +
"Bogdan Raczynski\t1977-01-01\tPoland\tnull\tMale\t" +
"[Dance, Electro]\tPerson\n" +
- "Krishna Das\t1947-05-31\tUSA\tnull\tMale\t[Unknown]\tPerson\n");
+ "Krishna Das\t1947-05-31\tUSA\tnull\tMale\t[Unknown]\tPerson\n",
+ actual.message().get(0).getData());
}
@Test
- public void should_interpret_select_statement_with_cql_format() {
- //When
+ void should_interpret_select_statement_with_cql_format() {
+ // When
intrContext.getLocalProperties().put("outputFormat", "cql");
final InterpreterResult actual = interpreter.interpret(
- "SELECT * FROM " + ARTISTS_TABLE + " LIMIT 2;", intrContext);
+ "SELECT * FROM " + ARTISTS_TABLE + " LIMIT 2;", intrContext);
intrContext.getLocalProperties().remove("outputFormat");
- //Then
- assertThat(actual).isNotNull();
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData())
- .isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" +
- "'Bogdan Raczynski'\t'1977-01-01'\t'Poland'\tnull\t'Male'\t" +
- "['Dance','Electro']\t'Person'\n" +
- "'Krishna Das'\t'1947-05-31'\t'USA'\tnull\t'Male'\t['Unknown']\t'Person'\n");
+ // Then
+ assertNotNull(actual);
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" +
+ "'Bogdan Raczynski'\t'1977-01-01'\t'Poland'\tnull\t'Male'\t" +
+ "['Dance','Electro']\t'Person'\n" +
+ "'Krishna Das'\t'1947-05-31'\t'USA'\tnull\t'Male'\t['Unknown']\t'Person'\n",
+ actual.message().get(0).getData());
}
@Test
- public void should_interpret_select_statement_with_formatting_options() {
- //When
+ void should_interpret_select_statement_with_formatting_options() {
+ // When
Map props = intrContext.getLocalProperties();
props.put("outputFormat", "human");
props.put("locale", "de_DE");
props.put("floatPrecision", "2");
props.put("doublePrecision", "4");
props.put("decimalPrecision", "5");
- props.put("timeFormat", "hh:mma");
+ props.put("timeFormat", "hh:mm");
props.put("timestampFormat", "MM/dd/yy HH:mm");
- props.put("dateFormat", "E, d MMM yy");
+ props.put("dateFormat", "EEEE, d MMMM yy");
props.put("timezone", "Etc/GMT+2");
String query =
- "select date,time,timestamp,dec,double,float,tuple,udt from zeppelin.test_format;";
+ "select date,time,timestamp,dec,double,float,tuple,udt from zeppelin.test_format;";
final InterpreterResult actual = interpreter.interpret(query, intrContext);
props.remove("outputFormat");
props.remove("locale");
@@ -228,624 +229,614 @@ public void should_interpret_select_statement_with_formatting_options() {
props.remove("dateFormat");
props.remove("timezone");
- //Then
- assertThat(actual).isNotNull();
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
+ // Then
+ assertNotNull(actual);
+ assertEquals(Code.SUCCESS, actual.code());
String expected = "date\ttime\ttimestamp\tdec\tdouble\tfloat\ttuple\tudt\n" +
- "Di, 29 Jan 19\t04:05AM\t06/16/20 21:59\t123562352352,12346\t10,0153\t20,03\t" +
- "(1, text, 10)\t{id: 1, t: text, lst: [1, 2, 3]}\n";
- assertThat(actual.message().get(0).getData()).isEqualTo(expected);
+ "Dienstag, 29 Januar 19\t04:05\t06/16/20 21:59\t123562352352,12346\t10,0153\t20,03\t" +
+ "(1, text, 10)\t{id: 1, t: text, lst: [1, 2, 3]}\n";
+ assertEquals(expected, actual.message().get(0).getData());
}
@Test
- public void should_interpret_multiple_statements_with_single_line_logged_batch() {
- //Given
+ void should_interpret_multiple_statements_with_single_line_logged_batch() {
+ // Given
String statements = "CREATE TABLE IF NOT EXISTS zeppelin.albums(\n" +
- " title text PRIMARY KEY,\n" +
- " artist text,\n" +
- " year int\n" +
- ");\n" +
- "BEGIN BATCH" +
- " INSERT INTO zeppelin.albums(title,artist,year) " +
- "VALUES('The Impossible Dream EP','Carter the Unstoppable Sex Machine',1992);" +
- " INSERT INTO zeppelin.albums(title,artist,year) " +
- "VALUES('The Way You Are','Tears for Fears',1983);" +
- " INSERT INTO zeppelin.albums(title,artist,year) " +
- "VALUES('Primitive','Soulfly',2003);" +
- "APPLY BATCH;\n" +
- "SELECT * FROM zeppelin.albums;";
- //When
+ " title text PRIMARY KEY,\n" +
+ " artist text,\n" +
+ " year int\n" +
+ ");\n" +
+ "BEGIN BATCH" +
+ " INSERT INTO zeppelin.albums(title,artist,year) " +
+ "VALUES('The Impossible Dream EP','Carter the Unstoppable Sex Machine',1992);" +
+ " INSERT INTO zeppelin.albums(title,artist,year) " +
+ "VALUES('The Way You Are','Tears for Fears',1983);" +
+ " INSERT INTO zeppelin.albums(title,artist,year) " +
+ "VALUES('Primitive','Soulfly',2003);" +
+ "APPLY BATCH;\n" +
+ "SELECT * FROM zeppelin.albums;";
+ // When
final InterpreterResult actual = interpreter.interpret(statements, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("title\tartist\tyear\n" +
- "The Impossible Dream EP\tCarter the Unstoppable Sex Machine\t1992\n" +
- "The Way You Are\tTears for Fears\t1983\n" +
- "Primitive\tSoulfly\t2003\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("title\tartist\tyear\n" +
+ "The Impossible Dream EP\tCarter the Unstoppable Sex Machine\t1992\n" +
+ "The Way You Are\tTears for Fears\t1983\n" +
+ "Primitive\tSoulfly\t2003\n", actual.message().get(0).getData());
}
-
+
@Test
- public void should_throw_statement_not_having_semi_colon() {
- //Given
+ void should_throw_statement_not_having_semi_colon() {
+ // Given
String statement = "SELECT * zeppelin.albums";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(statement, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData())
- .contains("Error parsing input:\n" +
- "\t'SELECT * zeppelin.albums'\n" +
- "Did you forget to add ; (semi-colon) at the end of each CQL statement ?");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertTrue(actual.message().get(0).getData().contains("Error parsing input:\n" +
+ "\t'SELECT * zeppelin.albums'\n" +
+ "Did you forget to add ; (semi-colon) at the end of each CQL statement ?"),
+ actual.message().get(0).getData());
}
@Test
- public void should_validate_statement() {
- //Given
+ void should_validate_statement() {
+ // Given
String statement = "SELECT * zeppelin.albums;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(statement, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
+ // Then
+ assertEquals(Code.ERROR, actual.code());
String s = "line 1:9 mismatched input 'zeppelin' expecting K_FROM (SELECT * [zeppelin]...)";
- assertThat(actual.message().get(0).getData())
- .contains(s);
+ assertTrue(actual.message().get(0).getData().contains(s), actual.message().get(0).getData());
}
@Test
- public void should_execute_statement_with_consistency_option() {
- //Given
+ void should_execute_statement_with_consistency_option() {
+ // Given
String statement = "@consistency=THREE\n" +
- "SELECT * FROM zeppelin.artists LIMIT 1;";
+ "SELECT * FROM zeppelin.artists LIMIT 1;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(statement, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData())
- .contains("Not enough replicas available for query at consistency THREE (3 required " +
- "but only 1 alive)");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertTrue(actual.message().get(0).getData()
+ .contains("Not enough replicas available for query at consistency THREE (3 required " +
+ "but only 1 alive)"),
+ actual.message().get(0).getData());
}
@Test
- public void should_execute_statement_with_serial_consistency_option() {
- //Given
+ void should_execute_statement_with_serial_consistency_option() {
+ // Given
String statement = "@serialConsistency=SERIAL\n" +
- "SELECT * FROM zeppelin.artists LIMIT 1;";
+ "SELECT * FROM zeppelin.artists LIMIT 1;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(statement, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
}
@Test
- public void should_execute_statement_with_timestamp_option() throws Exception {
- //Given
+ void should_execute_statement_with_timestamp_option() throws Exception {
+ // Given
String statement1 = "INSERT INTO zeppelin.ts(key,val) VALUES('k','v1');";
String statement2 = "@timestamp=15\n" +
- "INSERT INTO zeppelin.ts(key,val) VALUES('k','v2');";
+ "INSERT INTO zeppelin.ts(key,val) VALUES('k','v2');";
CqlSession session = EmbeddedCassandraServerHelper.getSession();
// Insert v1 with current timestamp
interpreter.interpret(statement1, intrContext);
System.out.println("going to read data from zeppelin.ts;");
session.execute("SELECT val FROM zeppelin.ts LIMIT 1")
- .forEach(x -> System.out.println("row " + x ));
+ .forEach(x -> System.out.println("row " + x));
Thread.sleep(1);
- //When
+ // When
// Insert v2 with past timestamp
interpreter.interpret(statement2, intrContext);
System.out.println("going to read data from zeppelin.ts;");
session.execute("SELECT val FROM zeppelin.ts LIMIT 1")
- .forEach(x -> System.out.println("row " + x ));
+ .forEach(x -> System.out.println("row " + x));
final String actual = session.execute("SELECT val FROM zeppelin.ts LIMIT 1").one()
- .getString("val");
+ .getString("val");
- //Then
- assertThat(actual).isEqualTo("v1");
+ // Then
+ assertEquals("v1", actual);
}
@Test
- public void should_execute_statement_with_request_timeout() {
- //Given
+ void should_execute_statement_with_request_timeout() {
+ // Given
String statement = "@requestTimeOut=10000000\n" +
- "SELECT * FROM zeppelin.artists;";
+ "SELECT * FROM zeppelin.artists;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(statement, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
}
@Test
- public void should_execute_prepared_and_bound_statements() {
- //Given
+ void should_execute_prepared_and_bound_statements() {
+ // Given
String queries = "@prepare[ps]=INSERT INTO zeppelin.prepared(key,val) VALUES(?,?)\n" +
- "@prepare[select]=SELECT * FROM zeppelin.prepared WHERE key=:key\n" +
- "@bind[ps]='myKey','myValue'\n" +
- "@bind[select]='myKey'";
+ "@prepare[select]=SELECT * FROM zeppelin.prepared WHERE key=:key\n" +
+ "@bind[ps]='myKey','myValue'\n" +
+ "@bind[select]='myKey'";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("key\tval\n" +
- "myKey\tmyValue\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("key\tval\nmyKey\tmyValue\n", actual.message().get(0).getData());
}
@Test
- public void should_execute_bound_statement() {
- //Given
+ void should_execute_bound_statement() {
+ // Given
String queries = "@prepare[users_insert]=INSERT INTO zeppelin.users" +
- "(login,firstname,lastname,addresses,location)" +
- "VALUES(:login,:fn,:ln,:addresses,:loc)\n" +
- "@bind[users_insert]='jdoe','John','DOE'," +
- "{street_number: 3, street_name: 'Beverly Hills Bld', zip_code: 90209," +
- " country: 'USA', extra_info: ['Right on the hills','Next to the post box']," +
- " phone_numbers: {'home': 2016778524, 'office': 2015790847}}," +
- "('USA', 90209, 'Beverly Hills')\n" +
- "SELECT * FROM zeppelin.users WHERE login='jdoe';";
- //When
+ "(login,firstname,lastname,addresses,location)" +
+ "VALUES(:login,:fn,:ln,:addresses,:loc)\n" +
+ "@bind[users_insert]='jdoe','John','DOE'," +
+ "{street_number: 3, street_name: 'Beverly Hills Bld', zip_code: 90209," +
+ " country: 'USA', extra_info: ['Right on the hills','Next to the post box']," +
+ " phone_numbers: {'home': 2016778524, 'office': 2015790847}}," +
+ "('USA', 90209, 'Beverly Hills')\n" +
+ "SELECT * FROM zeppelin.users WHERE login='jdoe';";
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo(
- "login\taddresses\tage\tdeceased\tfirstname\tlast_update\tlastname\tlocation\n" +
- "jdoe\t" +
- "{street_number: 3, street_name: Beverly Hills Bld, zip_code: 90209, " +
- "country: USA, extra_info: [Right on the hills, Next to the post box], " +
- "phone_numbers: {home: 2016778524, office: 2015790847}}\tnull\t" +
- "null\t" +
- "John\t" +
- "null\t" +
- "DOE\t" +
- "(USA, 90209, Beverly Hills)\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("login\taddresses\tage\tdeceased\tfirstname\tlast_update\tlastname\tlocation\n" +
+ "jdoe\t" +
+ "{street_number: 3, street_name: Beverly Hills Bld, zip_code: 90209, " +
+ "country: USA, extra_info: [Right on the hills, Next to the post box], " +
+ "phone_numbers: {home: 2016778524, office: 2015790847}}\tnull\t" +
+ "null\t" +
+ "John\t" +
+ "null\t" +
+ "DOE\t" +
+ "(USA, 90209, Beverly Hills)\n", actual.message().get(0).getData());
}
@Test
- public void should_exception_when_executing_unknown_bound_statement() {
- //Given
+ void should_exception_when_executing_unknown_bound_statement() {
+ // Given
String queries = "@bind[select_users]='jdoe'";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData())
- .isEqualTo("The statement 'select_users' can not be bound to values. " +
- "Are you sure you did prepare it with @prepare[select_users] ?");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertEquals("The statement 'select_users' can not be bound to values. " +
+ "Are you sure you did prepare it with @prepare[select_users] ?",
+ actual.message().get(0).getData());
}
@Test
- public void should_extract_variable_from_statement() {
- //Given
+ void should_extract_variable_from_statement() {
+ // Given
AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null);
GUI gui = new GUI();
gui.textbox("login", "hsue");
gui.textbox("age", "27");
InterpreterContext intrContext = InterpreterContext.builder()
- .setParagraphTitle("Paragraph1")
- .setAngularObjectRegistry(angularObjectRegistry)
- .setGUI(gui)
- .build();
+ .setParagraphTitle("Paragraph1")
+ .setAngularObjectRegistry(angularObjectRegistry)
+ .setGUI(gui)
+ .build();
String queries = "@prepare[test_insert_with_variable]=" +
- "INSERT INTO zeppelin.users(login,firstname,lastname,age) VALUES(?,?,?,?)\n" +
- "@bind[test_insert_with_variable]='{{login=hsue}}','Helen','SUE',{{age=27}}\n" +
- "SELECT firstname,lastname,age FROM zeppelin.users WHERE login='hsue';";
- //When
+ "INSERT INTO zeppelin.users(login,firstname,lastname,age) VALUES(?,?,?,?)\n" +
+ "@bind[test_insert_with_variable]='{{login=hsue}}','Helen','SUE',{{age=27}}\n" +
+ "SELECT firstname,lastname,age FROM zeppelin.users WHERE login='hsue';";
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("firstname\tlastname\tage\n" +
- "Helen\tSUE\t27\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("firstname\tlastname\tage\nHelen\tSUE\t27\n", actual.message().get(0).getData());
}
@Test
- public void should_just_prepare_statement() {
- //Given
+ void should_just_prepare_statement() {
+ // Given
String queries = "@prepare[just_prepare]=SELECT name,country,styles " +
- "FROM zeppelin.artists LIMIT 3";
+ "FROM zeppelin.artists LIMIT 3";
final String expected = reformatHtml(
- readTestResource("/scalate/NoResult.html"));
+ readTestResource("/scalate/NoResult.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_execute_bound_statement_with_no_bound_value() {
- //Given
+ void should_execute_bound_statement_with_no_bound_value() {
+ // Given
String queries = "@prepare[select_no_bound_value]=SELECT name,country,styles " +
- "FROM zeppelin.artists LIMIT 3\n" +
- "@bind[select_no_bound_value]";
+ "FROM zeppelin.artists LIMIT 3\n" +
+ "@bind[select_no_bound_value]";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("name\tcountry\tstyles\n" +
- "Bogdan Raczynski\tPoland\t[Dance, Electro]\n" +
- "Krishna Das\tUSA\t[Unknown]\n" +
- "Sheryl Crow\tUSA\t[Classic, Rock, Country, Blues, Pop, Folk]\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("name\tcountry\tstyles\n" +
+ "Bogdan Raczynski\tPoland\t[Dance, Electro]\n" +
+ "Krishna Das\tUSA\t[Unknown]\n" +
+ "Sheryl Crow\tUSA\t[Classic, Rock, Country, Blues, Pop, Folk]\n",
+ actual.message().get(0).getData());
}
@Test
- public void should_parse_date_value() {
- //Given
+ void should_parse_date_value() {
+ // Given
String queries = "@prepare[parse_date]=INSERT INTO zeppelin.users(login,last_update) " +
- "VALUES(?,?)\n" +
- "@bind[parse_date]='last_update','2015-07-30 12:00:01'\n" +
- "SELECT last_update FROM zeppelin.users WHERE login='last_update';";
- //When
+ "VALUES(?,?)\n" +
+ "@bind[parse_date]='last_update','2015-07-30 12:00:01'\n" +
+ "SELECT last_update FROM zeppelin.users WHERE login='last_update';";
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).contains("last_update\n2015-07-30T12:00:01.000Z");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertTrue(actual.message().get(0).getData().contains("last_update\n2015-07-30T12:00:01.000Z"),
+ actual.message().get(0).getData());
}
@Test
- public void should_bind_null_value() {
- //Given
+ void should_bind_null_value() {
+ // Given
String queries = "@prepare[bind_null]=INSERT INTO zeppelin.users(login,firstname,lastname) " +
- "VALUES(?,?,?)\n" +
- "@bind[bind_null]='bind_null',null,'NULL'\n" +
- "SELECT firstname,lastname FROM zeppelin.users WHERE login='bind_null';";
- //When
+ "VALUES(?,?,?)\n" +
+ "@bind[bind_null]='bind_null',null,'NULL'\n" +
+ "SELECT firstname,lastname FROM zeppelin.users WHERE login='bind_null';";
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("firstname\tlastname\n" +
- "null\tNULL\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("firstname\tlastname\nnull\tNULL\n", actual.message().get(0).getData());
}
@Test
- public void should_bind_boolean_value() {
- //Given
+ void should_bind_boolean_value() {
+ // Given
String queries = "@prepare[bind_boolean]=INSERT INTO zeppelin.users(login,deceased) " +
- "VALUES(?,?)\n" +
- "@bind[bind_boolean]='bind_bool',false\n" +
- "SELECT login,deceased FROM zeppelin.users WHERE login='bind_bool';";
- //When
+ "VALUES(?,?)\n" +
+ "@bind[bind_boolean]='bind_bool',false\n" +
+ "SELECT login,deceased FROM zeppelin.users WHERE login='bind_bool';";
+ // When
final InterpreterResult actual = interpreter.interpret(queries, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message().get(0).getData()).isEqualTo("login\tdeceased\n" +
- "bind_bool\tfalse\n");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("login\tdeceased\nbind_bool\tfalse\n", actual.message().get(0).getData());
}
@Test
- public void should_fail_when_executing_a_removed_prepared_statement() {
- //Given
+ void should_fail_when_executing_a_removed_prepared_statement() {
+ // Given
String prepareFirst = "@prepare[to_be_removed]=INSERT INTO zeppelin.users(login,deceased) " +
- "VALUES(?,?)";
+ "VALUES(?,?)";
interpreter.interpret(prepareFirst, intrContext);
String removePrepared = "@remove_prepare[to_be_removed]\n" +
- "@bind[to_be_removed]='bind_bool'";
+ "@bind[to_be_removed]='bind_bool'";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(removePrepared, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData()).isEqualTo("The statement 'to_be_removed' can " +
- "not be bound to values. Are you sure you did prepare it with " +
- "@prepare[to_be_removed] ?");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertEquals("The statement 'to_be_removed' can " +
+ "not be bound to values. Are you sure you did prepare it with " +
+ "@prepare[to_be_removed] ?", actual.message().get(0).getData());
}
@Test
- public void should_display_statistics_for_non_select_statement() {
- //Given
+ void should_display_statistics_for_non_select_statement() {
+ // Given
String query = "USE zeppelin;\nCREATE TABLE IF NOT EXISTS no_select(id int PRIMARY KEY);";
final String rawResult = reformatHtml(readTestResource(
- "/scalate/NoResultWithExecutionInfo.html"));
+ "/scalate/NoResultWithExecutionInfo.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
final int port = EmbeddedCassandraServerHelper.getNativeTransportPort();
final String address = EmbeddedCassandraServerHelper.getHost();
- //Then
+ // Then
final String expected = rawResult.replaceAll("TRIED_HOSTS", address + ":" + port)
- .replaceAll("QUERIED_HOSTS", address + ":" + port);
+ .replaceAll("QUERIED_HOSTS", address + ":" + port);
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_error_and_display_stack_trace() {
- //Given
+ void should_error_and_display_stack_trace() {
+ // Given
String query = "@consistency=THREE\n" +
- "SELECT * FROM zeppelin.users LIMIT 3;";
- //When
+ "SELECT * FROM zeppelin.users LIMIT 3;";
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData())
- .contains("All 1 node(s) tried for the query failed");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertTrue(
+ actual.message().get(0).getData().contains("All 1 node(s) tried for the query failed"),
+ actual.message().get(0).getData());
}
@Test
- public void should_describe_cluster() {
- //Given
+ void should_describe_cluster() {
+ // Given
String query = "DESCRIBE CLUSTER;";
final String expected = reformatHtml(
- readTestResource("/scalate/DescribeCluster.html"));
+ readTestResource("/scalate/DescribeCluster.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_describe_keyspaces() {
- //Given
+ void should_describe_keyspaces() {
+ // Given
String query = "DESCRIBE KEYSPACES;";
final String expected = reformatHtml(
- readTestResource("/scalate/DescribeKeyspaces.html"));
+ readTestResource("/scalate/DescribeKeyspaces.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_describe_keyspace() {
- //Given
+ void should_describe_keyspace() {
+ // Given
String query = "DESCRIBE KEYSPACE live_data;";
final String expected = reformatHtml(
- readTestResource("/scalate/DescribeKeyspace_live_data.html"));
+ readTestResource("/scalate/DescribeKeyspace_live_data.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- @Ignore
- //TODO(n.a.) activate test when using Java 8 and C* 3.x
- public void should_describe_function() throws Exception {
- //Given
+ @Disabled("TODO(n.a.) activate test when using Java 8 and C* 3.x")
+ void should_describe_function() throws Exception {
+ // Given
Properties properties = new Properties();
properties.setProperty(CASSANDRA_HOSTS, "127.0.0.1");
- properties.setProperty(CASSANDRA_PORT, "9042");
+ properties.setProperty(CASSANDRA_PORT, "9042");
Interpreter interpreter = new CassandraInterpreter(properties);
interpreter.open();
String createFunction = "CREATE FUNCTION zeppelin.maxof(val1 int,val2 int) " +
- "RETURNS NULL ON NULL INPUT " +
- "RETURNS int " +
- "LANGUAGE java " +
- "AS $$" +
- " return Math.max(val1, val2);\n" +
- "$$;";
+ "RETURNS NULL ON NULL INPUT " +
+ "RETURNS int " +
+ "LANGUAGE java " +
+ "AS $$" +
+ " return Math.max(val1, val2);\n" +
+ "$$;";
interpreter.interpret(createFunction, intrContext);
String query = "DESCRIBE FUNCTION zeppelin.maxOf;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(actual.message()).isEqualTo("xxxxx");
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals("xxxxx", actual.message());
}
@Test
- @Ignore
- //TODO(n.a.) activate test when using Java 8 and C* 3.x
- public void should_describe_aggregate() throws Exception {
- //Given
+ @Disabled("TODO(n.a.) activate test when using Java 8 and C* 3.x")
+ void should_describe_aggregate() throws Exception {
+ // Given
Properties properties = new Properties();
properties.setProperty(CASSANDRA_HOSTS, "127.0.0.1");
- properties.setProperty(CASSANDRA_PORT, "9042");
+ properties.setProperty(CASSANDRA_PORT, "9042");
Interpreter interpreter = new CassandraInterpreter(properties);
interpreter.open();
final String query = "DESCRIBE AGGREGATES;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
}
@Test
- @Ignore
- //TODO(n.a.) activate test when using Java 8 and C* 3.x
- public void should_describe_materialized_view() throws Exception {
- //Given
+ @Disabled("TODO(n.a.) activate test when using Java 8 and C* 3.x")
+ void should_describe_materialized_view() throws Exception {
+ // Given
Properties properties = new Properties();
properties.setProperty(CASSANDRA_HOSTS, "127.0.0.1");
- properties.setProperty(CASSANDRA_PORT, "9042");
+ properties.setProperty(CASSANDRA_PORT, "9042");
Interpreter interpreter = new CassandraInterpreter(properties);
interpreter.open();
final String query = "DESCRIBE MATERIALIZED VIEWS;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
}
@Test
- public void should_describe_table() {
- //Given
+ void should_describe_table() {
+ // Given
String query = "DESCRIBE TABLE live_data.complex_table;";
final String expected = reformatHtml(
- readTestResource("/scalate/DescribeTable_live_data_complex_table.html"));
+ readTestResource("/scalate/DescribeTable_live_data_complex_table.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_describe_udt() {
- //Given
+ void should_describe_udt() {
+ // Given
String query = "DESCRIBE TYPE live_data.address;";
final String expected = reformatHtml(
- readTestResource("/scalate/DescribeType_live_data_address.html"));
+ readTestResource("/scalate/DescribeType_live_data_address.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_describe_udt_withing_logged_in_keyspace() {
- //Given
+ void should_describe_udt_withing_logged_in_keyspace() {
+ // Given
String query = "USE live_data;\n" +
- "DESCRIBE TYPE address;";
+ "DESCRIBE TYPE address;";
final String expected = reformatHtml(readTestResource(
- "/scalate/DescribeType_live_data_address_within_current_keyspace.html"));
+ "/scalate/DescribeType_live_data_address_within_current_keyspace.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_describe_all_tables() {
- //Given
+ void should_describe_all_tables() {
+ // Given
String query = "DESCRIBE TABLES;";
final String expected = reformatHtml(readTestResource(
- "/scalate/DescribeTables.html"));
+ "/scalate/DescribeTables.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
@Test
- public void should_describe_all_udts() {
- //Given
+ void should_describe_all_udts() {
+ // Given
String query = "DESCRIBE TYPES;";
final String expected = reformatHtml(readTestResource(
- "/scalate/DescribeTypes.html"));
+ "/scalate/DescribeTypes.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertEquals(expected, reformatHtml(actual.message().get(0).getData()));
}
-
@Test
- public void should_error_describing_non_existing_table() {
- //Given
+ void should_error_describing_non_existing_table() {
+ // Given
String query = "USE system;\n" +
- "DESCRIBE TABLE complex_table;";
+ "DESCRIBE TABLE complex_table;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData())
- .contains("Cannot find table system.complex_table");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertTrue(actual.message().get(0).getData().contains("Cannot find table system.complex_table"),
+ actual.message().get(0).getData());
}
@Test
- public void should_error_describing_non_existing_udt() {
- //Given
+ void should_error_describing_non_existing_udt() {
+ // Given
String query = "USE system;\n" +
- "DESCRIBE TYPE address;";
+ "DESCRIBE TYPE address;";
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.ERROR);
- assertThat(actual.message().get(0).getData()).contains("Cannot find type system.address");
+ // Then
+ assertEquals(Code.ERROR, actual.code());
+ assertTrue(actual.message().get(0).getData().contains("Cannot find type system.address"),
+ actual.message().get(0).getData());
}
@Test
- public void should_show_help() {
- //Given
+ void should_show_help() {
+ // Given
String query = "HELP;";
final String expected = reformatHtml(readTestResource("/scalate/Help.html"));
- //When
+ // When
final InterpreterResult actual = interpreter.interpret(query, intrContext);
- //Then
- assertThat(actual.code()).isEqualTo(Code.SUCCESS);
- assertThat(reformatHtml(actual.message().get(0).getData())).contains(expected);
+ // Then
+ assertEquals(Code.SUCCESS, actual.code());
+ assertTrue(reformatHtml(actual.message().get(0).getData()).contains(expected),
+ reformatHtml(actual.message().get(0).getData()));
}
private static String reformatHtml(String rawHtml) {
- return rawHtml
- .replaceAll("\\s*\n\\s*", "")
- .replaceAll(">\\s+<", "><")
- .replaceAll("(?s)data-target=\"#[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "")
- .replaceAll("(?s)id=\"[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "")
- .replaceAll("AND memtable_flush_period_in_ms = 0", "")
- .trim();
+ return rawHtml
+ .replaceAll("\\s*\n\\s*", "")
+ .replaceAll(">\\s+<", "><")
+ .replaceAll("(?s)data-target=\"#[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "")
+ .replaceAll("(?s)id=\"[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "")
+ .replaceAll("AND memtable_flush_period_in_ms = 0", "")
+ .trim();
}
private static String readTestResource(String testResource) {
- StringBuilder builder = new StringBuilder();
- InputStream stream = testResource.getClass().getResourceAsStream(testResource);
-
- try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) {
- String line;
- while ((line = br.readLine()) != null) {
- builder.append(line).append("\n");
- }
- } catch (Exception ex) {
- throw new RuntimeException(ex);
+ try {
+ return IOUtils.toString(
+ CassandraInterpreterTest.class.getResourceAsStream(testResource),
+ StandardCharsets.UTF_8);
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
}
-
- return builder.toString();
}
}
diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java
index fda915d2c47..c7f952c7d9f 100644
--- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java
+++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java
@@ -22,12 +22,17 @@
import static com.datastax.oss.driver.api.core.ConsistencyLevel.QUORUM;
import static com.datastax.oss.driver.api.core.ConsistencyLevel.SERIAL;
import static com.datastax.oss.driver.api.core.cql.BatchType.UNLOGGED;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.mockito.Mockito.eq;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.verifyNoInteractions;
import static org.mockito.Mockito.when;
import static java.util.Arrays.asList;
@@ -37,16 +42,8 @@
import com.datastax.oss.driver.api.core.cql.BatchableStatement;
import com.datastax.oss.driver.api.core.cql.SimpleStatement;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
import org.mockito.Answers;
import org.mockito.ArgumentCaptor;
-import org.mockito.Captor;
-import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
-
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
@@ -71,320 +68,327 @@
import org.apache.zeppelin.display.ui.OptionInput.ParamOption;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
-@RunWith(MockitoJUnitRunner.class)
-public class InterpreterLogicTest {
- @Rule
- public ExpectedException expectedException = ExpectedException.none();
+class InterpreterLogicTest {
- @Mock(answer = Answers.RETURNS_DEEP_STUBS)
private InterpreterContext intrContext;
-
- @Mock
private CqlSession session;
- final InterpreterLogic helper = new InterpreterLogic(session, new Properties());
+ @BeforeEach
+ public void setup() {
+ intrContext = mock(InterpreterContext.class, Answers.RETURNS_DEEP_STUBS);
+ session = mock(CqlSession.class);
+ }
- @Captor
- ArgumentCaptor optionsCaptor;
+ final InterpreterLogic helper = new InterpreterLogic(session, new Properties());
@Test
- public void should_parse_input_string_block() {
- //Given
+ void should_parse_input_string_block() {
+ // Given
String input = "SELECT * FROM users LIMIT 10;";
- //When
+ // When
final List anyBlocks = this.toJavaList(helper.parseInput(input));
- //Then
- assertThat(anyBlocks).hasSize(1);
- assertThat(anyBlocks.get(0)).isInstanceOf(SimpleStm.class);
+ // Then
+ assertEquals(1, anyBlocks.size());
+ assertTrue(anyBlocks.get(0) instanceof SimpleStm);
}
@Test
- public void should_parse_input_string_block_with_comment_dash() {
- //Given
+ void should_parse_input_string_block_with_comment_dash() {
+ // Given
String input = "SELECT * FROM users LIMIT 10; -- this is a comment";
- //When
+ // When
final List anyBlocks = this.toJavaList(helper.parseInput(input));
- //Then
- assertThat(anyBlocks).hasSize(2);
- assertThat(anyBlocks.get(0)).isInstanceOf(SimpleStm.class);
- assertThat(anyBlocks.get(1)).isInstanceOf(TextBlockHierarchy.Comment.class);
+ // Then
+ assertEquals(2, anyBlocks.size());
+ assertTrue(anyBlocks.get(0) instanceof SimpleStm);
+ assertTrue(anyBlocks.get(1) instanceof TextBlockHierarchy.Comment);
}
@Test
- public void should_parse_input_string_block_with_comment_slash() {
- //Given
+ void should_parse_input_string_block_with_comment_slash() {
+ // Given
String input = "SELECT * FROM users LIMIT 10; // this is a comment";
- //When
+ // When
final List anyBlocks = this.toJavaList(helper.parseInput(input));
- //Then
- assertThat(anyBlocks).hasSize(2);
- assertThat(anyBlocks.get(0)).isInstanceOf(SimpleStm.class);
- assertThat(anyBlocks.get(1)).isInstanceOf(TextBlockHierarchy.Comment.class);
+ // Then
+ assertEquals(2, anyBlocks.size());
+ assertTrue(anyBlocks.get(0) instanceof SimpleStm);
+ assertTrue(anyBlocks.get(1) instanceof TextBlockHierarchy.Comment);
}
@Test
- public void should_exception_while_parsing_input() {
- //Given
+ void should_exception_while_parsing_input() {
+ // Given
String input = "SELECT * FROM users LIMIT 10";
- //When
- expectedException.expect(InterpreterException.class);
- expectedException.expectMessage("Error parsing input:\n" +
- "\t'SELECT * FROM users LIMIT 10'\n" +
- "Did you forget to add ; (semi-colon) at the end of each CQL statement ?");
+ // When
+ InterpreterException ex = assertThrows(InterpreterException.class, () -> {
+ helper.parseInput(input);
+ });
+
+ assertEquals("Error parsing input:\n" +
+ "\t'SELECT * FROM users LIMIT 10'\n" +
+ "Did you forget to add ; (semi-colon) at the end of each CQL statement ?", ex.getMessage());
- helper.parseInput(input);
}
@Test
- public void should_extract_variable_and_default_value() {
- //Given
+ void should_extract_variable_and_default_value() {
+ // Given
AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null);
when(intrContext.getAngularObjectRegistry()).thenReturn(angularObjectRegistry);
- when(intrContext.getGui().input("table", "zeppelin.demo")).thenReturn("zeppelin.demo");
- when(intrContext.getGui().input("id", "'John'")).thenReturn("'John'");
+ when(intrContext.getGui().textbox("table", "zeppelin.demo")).thenReturn("zeppelin.demo");
+ when(intrContext.getGui().textbox("id", "'John'")).thenReturn("'John'");
- //When
+ // When
final String actual = helper.maybeExtractVariables(
- "SELECT * FROM {{table=zeppelin.demo}} WHERE id={{id='John'}}", intrContext);
+ "SELECT * FROM {{table=zeppelin.demo}} WHERE id={{id='John'}}", intrContext);
- //Then
- assertThat(actual).isEqualTo("SELECT * FROM zeppelin.demo WHERE id='John'");
+ // Then
+ assertEquals("SELECT * FROM zeppelin.demo WHERE id='John'", actual);
}
@Test
- public void should_extract_variable_and_choices() {
- //Given
+ void should_extract_variable_and_choices() {
+ // Given
AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null);
when(intrContext.getAngularObjectRegistry()).thenReturn(angularObjectRegistry);
- when(intrContext.getGui().select(eq("name"), optionsCaptor.capture(), eq("'Paul'")))
- .thenReturn("'Jack'");
-
- //When
+ ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(ParamOption[].class);
+ when(intrContext.getGui().select(any(), any(), any())).thenReturn("'Jack'");
+ // When
final String actual = helper.maybeExtractVariables(
- "SELECT * FROM zeppelin.artists WHERE name={{name='Paul'|'Jack'|'Smith'}}",
- intrContext);
-
- //Then
- assertThat(actual).isEqualTo("SELECT * FROM zeppelin.artists WHERE name='Jack'");
+ "SELECT * FROM zeppelin.artists WHERE name={{name='Paul'|'Jack'|'Smith'}}",
+ intrContext);
+ verify(intrContext.getGui()).select(eq("name"), optionsCaptor.capture(), eq("'Paul'"));
+ // Then
+ assertEquals("SELECT * FROM zeppelin.artists WHERE name='Jack'", actual);
final List paramOptions = asList(optionsCaptor.getValue());
- assertThat(paramOptions.get(0).getValue()).isEqualTo("'Paul'");
- assertThat(paramOptions.get(1).getValue()).isEqualTo("'Jack'");
- assertThat(paramOptions.get(2).getValue()).isEqualTo("'Smith'");
+ assertEquals("'Paul'", paramOptions.get(0).getValue());
+ assertEquals("'Jack'", paramOptions.get(1).getValue());
+ assertEquals("'Smith'", paramOptions.get(2).getValue());
}
@Test
- public void should_extract_no_variable() {
- //Given
+ void should_extract_no_variable() {
+ // Given
GUI gui = mock(GUI.class);
when(intrContext.getGui()).thenReturn(gui);
- //When
+ // When
final String actual = helper.maybeExtractVariables("SELECT * FROM zeppelin.demo", intrContext);
- //Then
- verifyZeroInteractions(gui);
- assertThat(actual).isEqualTo("SELECT * FROM zeppelin.demo");
+ // Then
+ verifyNoInteractions(gui);
+ assertEquals("SELECT * FROM zeppelin.demo", actual);
}
@Test
- public void should_extract_variable_from_angular_object_registry() {
- //Given
+ void should_extract_variable_from_angular_object_registry() {
+ // Given
AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null);
angularObjectRegistry.add("id", "from_angular_registry", "noteId", "paragraphId");
when(intrContext.getAngularObjectRegistry()).thenReturn(angularObjectRegistry);
when(intrContext.getNoteId()).thenReturn("noteId");
when(intrContext.getParagraphId()).thenReturn("paragraphId");
- //When
+ // When
final String actual = helper.maybeExtractVariables(
- "SELECT * FROM zeppelin.demo WHERE id='{{id=John}}'", intrContext);
+ "SELECT * FROM zeppelin.demo WHERE id='{{id=John}}'", intrContext);
- //Then
- assertThat(actual).isEqualTo("SELECT * FROM zeppelin.demo WHERE id='from_angular_registry'");
+ // Then
+ assertEquals("SELECT * FROM zeppelin.demo WHERE id='from_angular_registry'", actual);
verify(intrContext, never()).getGui();
}
@Test
public void should_error_if_incorrect_variable_definition() {
- //Given
-
- //When
- expectedException.expect(ParsingException.class);
- expectedException.expectMessage("Invalid bound variable definition for " +
- "'{{table?zeppelin.demo}}' in 'SELECT * FROM {{table?zeppelin.demo}} " +
- "WHERE id={{id='John'}}'. It should be of form 'variable=defaultValue'");
+ // Given
+
+ // When
+ ParsingException thrown = assertThrows(ParsingException.class, () -> {
+ // Then
+ helper.maybeExtractVariables("SELECT * FROM {{table?zeppelin.demo}} WHERE id={{id='John'}}",
+ intrContext);
+ });
+ assertEquals("Invalid bound variable definition for " +
+ "'{{table?zeppelin.demo}}' in 'SELECT * FROM {{table?zeppelin.demo}} " +
+ "WHERE id={{id='John'}}'. It should be of form 'variable=defaultValue' " +
+ "or 'variable=value1|value2|...|valueN'",
+ thrown.getMessage());
- //Then
- helper.maybeExtractVariables("SELECT * FROM {{table?zeppelin.demo}} WHERE id={{id='John'}}",
- intrContext);
}
@Test
- public void should_extract_consistency_option() {
- //Given
+ void should_extract_consistency_option() {
+ // Given
List options = Arrays.asList(new Consistency(ALL),
- new Consistency(ONE));
+ new Consistency(ONE));
- //When
+ // When
final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options));
- //Then
- assertThat(actual.consistency().get()).isEqualTo(ALL);
+ // Then
+ assertEquals(ALL, actual.consistency().get());
}
@Test
- public void should_extract_serial_consistency_option() {
- //Given
+ void should_extract_serial_consistency_option() {
+ // Given
List options = Arrays.asList(new SerialConsistency(SERIAL),
- new SerialConsistency(LOCAL_SERIAL));
+ new SerialConsistency(LOCAL_SERIAL));
- //When
+ // When
final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options));
- //Then
- assertThat(actual.serialConsistency().get()).isEqualTo(SERIAL);
+ // Then
+ assertEquals(SERIAL, actual.serialConsistency().get());
}
@Test
- public void should_extract_timestamp_option() {
- //Given
+ void should_extract_timestamp_option() {
+ // Given
List options = Arrays.asList(new Timestamp(123L),
- new Timestamp(456L));
+ new Timestamp(456L));
- //When
+ // When
final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options));
- //Then
- assertThat(actual.timestamp().get()).isEqualTo(123L);
+ // Then
+ assertEquals(123L, actual.timestamp().get());
}
@Test
- public void should_extract_request_timeout_option() {
- //Given
+ void should_extract_request_timeout_option() {
+ // Given
List options = Collections.singletonList(new RequestTimeOut(100));
- //When
+ // When
final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options));
- //Then
- assertThat(actual.requestTimeOut().get()).isEqualTo(100);
+ // Then
+ assertEquals(100, actual.requestTimeOut().get());
}
@Test
- public void should_generate_simple_statement() {
- //Given
+ void should_generate_simple_statement() {
+ // Given
String input = "SELECT * FROM users LIMIT 10;";
CassandraQueryOptions options = new CassandraQueryOptions(Option.apply(QUORUM),
- Option.empty(),
- Option.empty(),
- Option.empty(),
- Option.empty());
+ Option.empty(),
+ Option.empty(),
+ Option.empty(),
+ Option.empty());
- //When
+ // When
final SimpleStatement actual = helper.generateSimpleStatement(new SimpleStm(input), options,
- intrContext);
+ intrContext);
- //Then
- assertThat(actual).isNotNull();
- assertThat(actual.getQuery()).isEqualTo("SELECT * FROM users LIMIT 10;");
- assertThat(actual.getConsistencyLevel()).isSameAs(QUORUM);
+ // Then
+ assertNotNull(actual);
+ assertEquals("SELECT * FROM users LIMIT 10;", actual.getQuery());
+ assertSame(QUORUM, actual.getConsistencyLevel());
}
@Test
- public void should_generate_batch_statement() {
- //Given
+ void should_generate_batch_statement() {
+ // Given
SimpleStatement st1 = SimpleStatement.newInstance("SELECT * FROM users LIMIT 10;");
SimpleStatement st2 = SimpleStatement.newInstance("INSERT INTO users(id) VALUES(10);");
SimpleStatement st3 = SimpleStatement.newInstance(
- "UPDATE users SET name = 'John DOE' WHERE id=10;");
+ "UPDATE users SET name = 'John DOE' WHERE id=10;");
CassandraQueryOptions options = new CassandraQueryOptions(Option.apply(QUORUM),
- Option.empty(),
- Option.empty(),
- Option.empty(),
- Option.empty());
+ Option.empty(),
+ Option.empty(),
+ Option.empty(),
+ Option.empty());
- //When
+ // When
BatchStatement actual = helper.generateBatchStatement(UNLOGGED, options,
- toScalaList(asList(st1, st2, st3)));
+ toScalaList(asList(st1, st2, st3)));
- //Then
- assertThat(actual).isNotNull();
+ // Then
+ assertNotNull(actual);
List statements = new ArrayList<>();
- for (BatchableStatement b: actual) {
+ for (BatchableStatement b : actual) {
statements.add(b);
}
- assertThat(statements).hasSize(3);
- assertThat(statements.get(0)).isSameAs(st1);
- assertThat(statements.get(1)).isSameAs(st2);
- assertThat(statements.get(2)).isSameAs(st3);
- assertThat(actual.getConsistencyLevel()).isSameAs(QUORUM);
+ assertEquals(3, statements.size());
+ assertSame(st1, statements.get(0));
+ assertSame(st2, statements.get(1));
+ assertSame(st3, statements.get(2));
+ assertSame(QUORUM, actual.getConsistencyLevel());
}
@Test
- public void should_parse_bound_values() {
- //Given
+ void should_parse_bound_values() {
+ // Given
String bs = "'jdoe',32,'John DOE',null, true, '2014-06-12 34:00:34'";
- //When
+ // When
final List actual = this.toJavaList(helper.parseBoundValues("ps", bs));
- //Then
- assertThat(actual).containsExactly("'jdoe'", "32", "'John DOE'",
- "null", "true", "2014-06-12 34:00:34");
+ // Then
+ assertEquals("'jdoe'", actual.get(0));
+ assertEquals("32", actual.get(1));
+ assertEquals("'John DOE'", actual.get(2));
+ assertEquals("null", actual.get(3));
+ assertEquals("true", actual.get(4));
+ assertEquals("2014-06-12 34:00:34", actual.get(5));
}
@Test
- public void should_parse_simple_date() {
- //Given
+ void should_parse_simple_date() {
+ // Given
String dateString = "2015-07-30 12:00:01";
- //When
+ // When
final Instant actual = helper.parseDate(dateString);
- //Then
+ // Then
ZonedDateTime dt = actual.atZone(ZoneOffset.UTC);
- assertThat(dt.getLong(ChronoField.YEAR_OF_ERA)).isEqualTo(2015);
- assertThat(dt.getLong(ChronoField.MONTH_OF_YEAR)).isEqualTo(7);
- assertThat(dt.getLong(ChronoField.DAY_OF_MONTH)).isEqualTo(30);
- assertThat(dt.getLong(ChronoField.HOUR_OF_DAY)).isEqualTo(12);
- assertThat(dt.getLong(ChronoField.MINUTE_OF_HOUR)).isEqualTo(0);
- assertThat(dt.getLong(ChronoField.SECOND_OF_MINUTE)).isEqualTo(1);
+ assertEquals(2015, dt.getLong(ChronoField.YEAR_OF_ERA));
+ assertEquals(7, dt.getLong(ChronoField.MONTH_OF_YEAR));
+ assertEquals(30, dt.getLong(ChronoField.DAY_OF_MONTH));
+ assertEquals(12, dt.getLong(ChronoField.HOUR_OF_DAY));
+ assertEquals(0, dt.getLong(ChronoField.MINUTE_OF_HOUR));
+ assertEquals(1, dt.getLong(ChronoField.SECOND_OF_MINUTE));
}
@Test
- public void should_parse_accurate_date() {
- //Given
+ void should_parse_accurate_date() {
+ // Given
String dateString = "2015-07-30 12:00:01.123";
- //When
+ // When
final Instant actual = helper.parseDate(dateString);
- //Then
+ // Then
ZonedDateTime dt = actual.atZone(ZoneOffset.UTC);
- assertThat(dt.getLong(ChronoField.YEAR_OF_ERA)).isEqualTo(2015);
- assertThat(dt.getLong(ChronoField.MONTH_OF_YEAR)).isEqualTo(7);
- assertThat(dt.getLong(ChronoField.DAY_OF_MONTH)).isEqualTo(30);
- assertThat(dt.getLong(ChronoField.HOUR_OF_DAY)).isEqualTo(12);
- assertThat(dt.getLong(ChronoField.MINUTE_OF_HOUR)).isEqualTo(0);
- assertThat(dt.getLong(ChronoField.SECOND_OF_MINUTE)).isEqualTo(1);
- assertThat(dt.getLong(ChronoField.MILLI_OF_SECOND)).isEqualTo(123);
+ assertEquals(2015, dt.getLong(ChronoField.YEAR_OF_ERA));
+ assertEquals(7, dt.getLong(ChronoField.MONTH_OF_YEAR));
+ assertEquals(30, dt.getLong(ChronoField.DAY_OF_MONTH));
+ assertEquals(12, dt.getLong(ChronoField.HOUR_OF_DAY));
+ assertEquals(0, dt.getLong(ChronoField.MINUTE_OF_HOUR));
+ assertEquals(1, dt.getLong(ChronoField.SECOND_OF_MINUTE));
+ assertEquals(123, dt.getLong(ChronoField.MILLI_OF_SECOND));
}
- private scala.collection.immutable.List toScalaList(java.util.List list) {
+ private scala.collection.immutable.List toScalaList(java.util.List list) {
return scala.collection.JavaConversions.collectionAsScalaIterable(list).toList();
}
- private java.util.List toJavaList(scala.collection.immutable.List list){
+ private java.util.List toJavaList(scala.collection.immutable.List list) {
return scala.collection.JavaConversions.seqAsJavaList(list);
}
}
diff --git a/cassandra/src/test/resources/scalate/DescribeKeyspaces.html b/cassandra/src/test/resources/scalate/DescribeKeyspaces.html
index ac48bd34117..f06b9406c3f 100644
--- a/cassandra/src/test/resources/scalate/DescribeKeyspaces.html
+++ b/cassandra/src/test/resources/scalate/DescribeKeyspaces.html
@@ -1 +1 @@
-
\ No newline at end of file
diff --git a/cassandra/src/test/resources/scalate/DescribeTables.html b/cassandra/src/test/resources/scalate/DescribeTables.html
index 05992914d3c..cba4b82536e 100644
--- a/cassandra/src/test/resources/scalate/DescribeTables.html
+++ b/cassandra/src/test/resources/scalate/DescribeTables.html
@@ -1 +1 @@
-
\ No newline at end of file
diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala
index de14c880d55..b8ed391c7c8 100644
--- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala
+++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala
@@ -16,11 +16,11 @@
*/
package org.apache.zeppelin.cassandra
-import org.scalatest.{Matchers, BeforeAndAfterEach, FlatSpec}
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.flatspec._
+import org.scalatest.matchers.should.Matchers._
-class BoundValuesParserTest extends FlatSpec
-with BeforeAndAfterEach
-with Matchers {
+class BoundValuesParserTest extends AnyFlatSpec with BeforeAndAfterEach {
val parser = new BoundValuesParser
diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala
index bf86227929c..3b0ae8f4656 100644
--- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala
+++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala
@@ -23,13 +23,13 @@ import java.util.Properties
import com.datastax.oss.driver.api.core.`type`.DataTypes
import com.datastax.oss.driver.api.core.`type`.codec.registry.CodecRegistry
-import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.flatspec._
+import org.scalatest.matchers.should.Matchers._
import scala.collection.JavaConverters._
-class CqlFormatterTest extends FlatSpec
- with BeforeAndAfterEach
- with Matchers {
+class CqlFormatterTest extends AnyFlatSpec with BeforeAndAfterEach {
val longVal: java.lang.Long = java.lang.Long.valueOf(12345678901L)
val floatVal: java.lang.Float = java.lang.Float.valueOf(123.456789f)
diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala
index 006fc144b2d..2dcfe7e4e36 100644
--- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala
+++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala
@@ -18,9 +18,9 @@
package org.apache.zeppelin.cassandra
import com.datastax.oss.driver.api.core.cql.{BatchStatement, BatchType, SimpleStatement}
-import org.scalatest.FlatSpec
+import org.scalatest.flatspec._
-class EnhancedSessionTest extends FlatSpec {
+class EnhancedSessionTest extends AnyFlatSpec {
"Query" should "be detected as DDL for create" in {
assertResult(true){
diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala
index 19afafcbefe..794547f80f3 100644
--- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala
+++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala
@@ -19,19 +19,16 @@ package org.apache.zeppelin.cassandra
import com.datastax.oss.driver.api.core.{ConsistencyLevel, CqlSession}
import com.datastax.oss.driver.api.core.cql.{BatchType, PreparedStatement}
import org.apache.zeppelin.interpreter.InterpreterException
-import org.scalatest.mock.MockitoSugar
-import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
+import org.mockito.Mockito
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.flatspec._
+import org.scalatest.matchers.should.Matchers._
import org.apache.zeppelin.cassandra.ParagraphParser._
import org.apache.zeppelin.cassandra.TextBlockHierarchy._
-import scala.Option
+class ParagraphParserTest extends AnyFlatSpec with BeforeAndAfterEach {
-class ParagraphParserTest extends FlatSpec
- with BeforeAndAfterEach
- with Matchers
- with MockitoSugar {
-
- val session: CqlSession = mock[CqlSession]
+ val session: CqlSession = Mockito.mock[CqlSession](classOf[CqlSession])
val preparedStatements:collection.mutable.Map[String,PreparedStatement] = collection.mutable.Map()
val parser: ParagraphParser = new ParagraphParser()
diff --git a/conf/interpreter-list b/conf/interpreter-list
index 4be4d7ddd10..8897a48b60a 100644
--- a/conf/interpreter-list
+++ b/conf/interpreter-list
@@ -17,32 +17,20 @@
#
# [name] [maven artifact] [description]
-alluxio org.apache.zeppelin:zeppelin-alluxio:0.10.0 Alluxio interpreter
-angular org.apache.zeppelin:zeppelin-angular:0.10.0 HTML and AngularJS view rendering
-beam org.apache.zeppelin:zeppelin-beam:0.10.0 Beam interpreter
-bigquery org.apache.zeppelin:zeppelin-bigquery:0.10.0 BigQuery interpreter
-cassandra org.apache.zeppelin:zeppelin-cassandra:0.10.0 Cassandra interpreter
-elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.10.0 Elasticsearch interpreter
-file org.apache.zeppelin:zeppelin-file:0.10.0 HDFS file interpreter
-flink org.apache.zeppelin:zeppelin-flink:0.10.0 Flink interpreter
-geode org.apache.zeppelin:zeppelin-geode:0.10.0 Apache Geode interpreter
-groovy org.apache.zeppelin:zeppelin-groovy:0.10.0 Groovy interpreter
-hazelcastjet org.apache.zeppelin:zeppelin-hazelcastjet:0.10.0 Hazelcast Jet interpreter
-hbase org.apache.zeppelin:zeppelin-hbase:0.10.0 Hbase interpreter
-ignite org.apache.zeppelin:zeppelin-ignite:0.10.0 Ignite interpreter
-java org.apache.zeppelin:zeppelin-java:0.10.0 Java interpreter
-jdbc org.apache.zeppelin:zeppelin-jdbc:0.10.0 Jdbc interpreter
-kotlin org.apache.zeppelin:zeppelin-kotlin:0.10.0 Kotlin interpreter
-kylin org.apache.zeppelin:zeppelin-kylin:0.10.0 Kylin interpreter
-lens org.apache.zeppelin:zeppelin-lens:0.10.0 Lens interpreter
-livy org.apache.zeppelin:zeppelin-livy:0.10.0 Livy interpreter
-md org.apache.zeppelin:zeppelin-markdown:0.10.0 Markdown support
-neo4j org.apache.zeppelin:zeppelin-neo4j:0.10.0 Neo4j interpreter
-pig org.apache.zeppelin:zeppelin-pig:0.10.0 Pig interpreter
-python org.apache.zeppelin:zeppelin-python:0.10.0 Python interpreter
-sap org.apache.zeppelin:zeppelin-sap:0.10.0 SAP Support
-scalding org.apache.zeppelin:zeppelin-scalding_2.0.10:0.10.0 Scalding interpreter
-scio org.apache.zeppelin:zeppelin-scio:0.10.0 Scio interpreter
-shell org.apache.zeppelin:zeppelin-shell:0.10.0 Shell command
-sparql org.apache.zeppelin:zeppelin-sparql:0.10.0 Sparql interpreter
-submarine org.apache.zeppelin:zeppelin-submarine:0.10.0 Submarine interpreter
+alluxio org.apache.zeppelin:zeppelin-alluxio:0.12.0-SNAPSHOT Alluxio interpreter
+angular org.apache.zeppelin:zeppelin-angular:0.12.0-SNAPSHOT HTML and AngularJS view rendering
+bigquery org.apache.zeppelin:zeppelin-bigquery:0.12.0-SNAPSHOT BigQuery interpreter
+cassandra org.apache.zeppelin:zeppelin-cassandra:0.12.0-SNAPSHOT Cassandra interpreter
+elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.12.0-SNAPSHOT Elasticsearch interpreter
+file org.apache.zeppelin:zeppelin-file:0.12.0-SNAPSHOT HDFS file interpreter
+flink org.apache.zeppelin:zeppelin-flink:0.12.0-SNAPSHOT Flink interpreter
+groovy org.apache.zeppelin:zeppelin-groovy:0.12.0-SNAPSHOT Groovy interpreter
+hbase org.apache.zeppelin:zeppelin-hbase:0.12.0-SNAPSHOT Hbase interpreter
+java org.apache.zeppelin:zeppelin-java:0.12.0-SNAPSHOT Java interpreter
+jdbc org.apache.zeppelin:zeppelin-jdbc:0.12.0-SNAPSHOT Jdbc interpreter
+livy org.apache.zeppelin:zeppelin-livy:0.12.0-SNAPSHOT Livy interpreter
+md org.apache.zeppelin:zeppelin-markdown:0.12.0-SNAPSHOT Markdown support
+neo4j org.apache.zeppelin:zeppelin-neo4j:0.12.0-SNAPSHOT Neo4j interpreter
+python org.apache.zeppelin:zeppelin-python:0.12.0-SNAPSHOT Python interpreter
+shell org.apache.zeppelin:zeppelin-shell:0.12.0-SNAPSHOT Shell command
+sparql org.apache.zeppelin:zeppelin-sparql:0.12.0-SNAPSHOT Sparql interpreter
diff --git a/conf/shiro.ini.template b/conf/shiro.ini.template
index 13db835a178..363b222334f 100644
--- a/conf/shiro.ini.template
+++ b/conf/shiro.ini.template
@@ -50,12 +50,6 @@ user3 = password4, role2
#pamRealm=org.apache.zeppelin.realm.PamRealm
#pamRealm.service=sshd
-### A sample for configuring ZeppelinHub Realm
-#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
-## Url of ZeppelinHub
-#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
-#securityManager.realms = $zeppelinHubRealm
-
## A same for configuring Knox SSO Realm
#knoxJwtRealm = org.apache.zeppelin.realm.jwt.KnoxJwtRealm
#knoxJwtRealm.providerUrl = https://domain.example.com/
diff --git a/conf/zeppelin-env.cmd.template b/conf/zeppelin-env.cmd.template
index 83b610e07a3..15c88fd4ca8 100644
--- a/conf/zeppelin-env.cmd.template
+++ b/conf/zeppelin-env.cmd.template
@@ -64,7 +64,7 @@ REM however, it is not encouraged when you can define SPARK_HOME
REM
REM Options read in YARN client mode
REM set HADOOP_CONF_DIR REM yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
-REM Pyspark (supported with Spark 1.2.1 and above)
+REM Pyspark (supported with Spark 3.3 and above)
REM To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
REM set PYSPARK_PYTHON REM path to the python command. must be the same path on the driver(Zeppelin) and all workers.
REM set PYTHONPATH
@@ -75,9 +75,3 @@ REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext
REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default.
REM set ZEPPELIN_SPARK_IMPORTIMPLICIT REM Import implicits, UDF collection, and sql if set true. true by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of Spark SQL result to display. 1000 by default.
-
-REM ZeppelinHub connection configuration
-REM
-REM set ZEPPELINHUB_API_ADDRESS REM Refers to the address of the ZeppelinHub service in use
-REM set ZEPPELINHUB_API_TOKEN REM Refers to the Zeppelin instance token of the user
-REM set ZEPPELINHUB_USER_KEY REM Optional, when using Zeppelin with authentication.
diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template
index 7c4a38b7cf7..e27a688becd 100644
--- a/conf/zeppelin-env.sh.template
+++ b/conf/zeppelin-env.sh.template
@@ -87,7 +87,7 @@
##
# Options read in YARN client mode
# export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
-# Pyspark (supported with Spark 1.2.1 and above)
+# Pyspark (supported with Spark 3.3 and above)
# To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
# export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers.
# export PYTHONPATH
@@ -107,11 +107,6 @@
# export HBASE_HOME= # (require) Under which HBase scripts and configuration should be
# export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
-#### ZeppelinHub connection configuration ####
-# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use
-# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user
-# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication.
-
#### Zeppelin impersonation configuration
# export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled
diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
index 53b0144dcdc..b1143c35283 100755
--- a/conf/zeppelin-site.xml.template
+++ b/conf/zeppelin-site.xml.template
@@ -22,7 +22,7 @@
zeppelin.server.addr127.0.0.1
- Server binding address
+ Server binding address. If you cannot connect to your web browser on WSL or Windows, change 127.0.0.1 to 0.0.0.0. It, however, causes security issues when you open your machine to the public
@@ -91,6 +91,12 @@
Enable collaborative mode
+
+ zeppelin.notebook.versioned.mode.enable
+ true
+ Value to enable/disable version control support in Notes
+
+
-
-
-
@@ -426,8 +423,8 @@
zeppelin.interpreter.connect.timeout
- 60000
- Interpreter process connect timeout in msec.
+ 600s
+ Interpreter process connect timeout. Default time unit is msec.
@@ -578,14 +575,14 @@
zeppelin.interpreter.lifecyclemanager.timeout.checkinterval
- 60000
- Milliseconds of the interval to checking whether interpreter is time out
+ 1m
+ Interval to checking whether interpreter is time outzeppelin.interpreter.lifecyclemanager.timeout.threshold
- 3600000
- Milliseconds of the interpreter timeout threshold, by default it is 1 hour
+ 1h
+ Interpreter timeout threshold, by default it is 1 hour
-->
diff --git a/dev/change_scala_version.sh b/dev/change_scala_version.sh
deleted file mode 100755
index 0ccfe7e263f..00000000000
--- a/dev/change_scala_version.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-set -e
-
-VALID_VERSIONS=( 2.10 2.11 )
-
-usage() {
- echo "Usage: $(basename $0) [-h|--help]
-where :
- -h| --help Display this help text
- valid version values : ${VALID_VERSIONS[*]}
-" 1>&2
- exit 1
-}
-
-if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then
- usage
-fi
-
-TO_VERSION="$1"
-
-check_scala_version() {
- for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done
- echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2
- exit 1
-}
-
-check_scala_version "${TO_VERSION}"
-
-if [ "${TO_VERSION}" = "2.11" ]; then
- FROM_VERSION="2.10"
- SCALA_LIB_VERSION="2.11.7"
-else
- FROM_VERSION="2.11"
- SCALA_LIB_VERSION="2.10.5"
-fi
-
-sed_i() {
- sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
-}
-
-export -f sed_i
-
-BASEDIR=$(dirname $0)/..
-find "${BASEDIR}" -name 'pom.xml' -not -path '*target*' -print \
- -exec bash -c "sed_i 's/\(artifactId.*\)_'${FROM_VERSION}'/\1_'${TO_VERSION}'/g' {}" \;
-
-# update in parent POM
-# Match any scala binary version to ensure idempotency
-sed_i '1,/[0-9]*\.[0-9]*[0-9]*\.[0-9]*'${TO_VERSION}'' \
- "${BASEDIR}/pom.xml"
-
-# update in parent POM
-# This is to make variables in leaf pom to be substituted to real value when flattened-pom is created.
-# maven-flatten plugin doesn't take properties defined under profile even if scala-2.11/scala-2.10 is activated via -Pscala-2.11/-Pscala-2.10,
-# and use default defined properties to create flatten pom.
-sed_i '1,/[0-9]*\.[0-9]*\.[0-9]*[0-9]*\.[0-9]*\.[0-9]*'${SCALA_LIB_VERSION}'' \
- "${BASEDIR}/pom.xml"
diff --git a/dev/change_zeppelin_version.sh b/dev/change_zeppelin_version.sh
index 0097e5b837f..0acebba586c 100755
--- a/dev/change_zeppelin_version.sh
+++ b/dev/change_zeppelin_version.sh
@@ -36,13 +36,13 @@ is_dev_version() {
else
return 1
fi
-}
+}
is_maintenance_version() {
local version="$1"
if [[ "${version}" == *"SNAPSHOT" ]]; then
version = $(echo ${1} | cut -d'-' -f 1)
- fi
+ fi
if [[ "${version}" == *".0" ]]; then
return 1
else
@@ -51,7 +51,7 @@ is_maintenance_version() {
}
# Change version in pom.xml
-mvn versions:set -DnewVersion="${TO_VERSION}" -DgenerateBackupPoms=false > /dev/null 2>&1
+./mvnw versions:set -DnewVersion="${TO_VERSION}" -DgenerateBackupPoms=false > /dev/null 2>&1
# Change version in example and package files
sed -i '' 's/-'"${FROM_VERSION}"'.jar",/-'"${TO_VERSION}"'.jar",/g' zeppelin-examples/zeppelin-example-clock/zeppelin-example-clock.json
@@ -61,6 +61,12 @@ sed -i '' 's/"version": "'"${FROM_VERSION}"'",/"version": "'"${TO_VERSION}"'",/g
# Change version in Dockerfile
sed -i '' 's/Z_VERSION="'"${FROM_VERSION}"'"/Z_VERSION="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin/bin/Dockerfile
+sed -i '' 's/version="'"${FROM_VERSION}"'"/version="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin-interpreter/Dockerfile
+sed -i '' 's/version="'"${FROM_VERSION}"'"/version="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin-server/Dockerfile
+
+# Change version in Kubernetes yaml
+sed -i '' 's/zeppelin-interpreter:${FROM_VERSION}/zeppelin-interpreter:${TO_VERSION}/g' k8s/zeppelin-server.yaml
+sed -i '' 's/zeppelin-server:${FROM_VERSION}/zeppelin-server:${TO_VERSION}/g' k8s/zeppelin-server.yaml
# Change docker image version in configuration
sed -i '' sed 's/zeppelin:'"${OLD_VERSION}"'/zeppelin:'"${NEW_VERSION}"'/g' conf/zeppelin-site.xml.template
diff --git a/dev/checkout_zeppelin_pr.sh b/dev/checkout_zeppelin_pr.sh
new file mode 100755
index 00000000000..08a1a00ec7e
--- /dev/null
+++ b/dev/checkout_zeppelin_pr.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# This utility creates a local branch PR_ from specified pull request,
+# to help the test and review.
+#
+# Prerequisites:
+# Add Apache Zeppelin as remote repo, with name "apache" (or something else
+# defined by environment variable APACHE_ZEPPELIN_REMOTE_REPO_NAME)
+#
+# git remote add apache git@github.com:apache/zeppelin.git
+#
+
+set -o pipefail
+set -e
+set -x
+
+APACHE_ZEPPELIN_REMOTE_REPO_NAME=${APACHE_ZEPPELIN_REMOTE_REPO_NAME:-"apache"}
+
+function usage {
+ echo "Usage: dev/checkout_zeppelin_pr.sh [-f] "
+ echo " -f force overwrite of local branch (default: fail if exists)"
+ exit 1
+}
+
+if [[ ${#} -eq 0 ]]; then
+ usage
+fi
+
+FORCE=""
+while getopts ":f" arg; do
+ case "${arg}" in
+ f)
+ FORCE="--force"
+ ;;
+ ?)
+ usage
+ ;;
+ esac
+done
+shift "$(($OPTIND -1))"
+
+PR_NUM=$1
+
+git fetch ${APACHE_ZEPPELIN_REMOTE_REPO_NAME} pull/${PR_NUM}/head:PR_${PR_NUM} ${FORCE}
+git checkout PR_${PR_NUM}
diff --git a/dev/common_release.sh b/dev/common_release.sh
index fda1700398a..0d39a589318 100644
--- a/dev/common_release.sh
+++ b/dev/common_release.sh
@@ -20,11 +20,18 @@
# common fucntions
if [[ -z "${TAR}" ]]; then
- TAR="/usr/bin/tar"
+ TAR="tar"
+ if [ "$(uname -s)" = "Darwin" ]; then
+ export COPYFILE_DISABLE=1
+ TAR="tar --no-mac-metadata --no-xattrs --no-fflags"
+ fi
fi
if [[ -z "${SHASUM}" ]]; then
- SHASUM="/usr/bin/shasum"
+ SHASUM="sha512sum"
+ if [ "$(uname -s)" = "Darwin" ]; then
+ SHASUM="shasum -a 512"
+ fi
fi
if [[ -z "${WORKING_DIR}" ]]; then
diff --git a/dev/create_release.sh b/dev/create_release.sh
index 536f3a5b96c..b5529169f6c 100755
--- a/dev/create_release.sh
+++ b/dev/create_release.sh
@@ -42,7 +42,6 @@ done
RELEASE_VERSION="$1"
GIT_TAG="$2"
-SCALA_VERSION="2.11"
function make_source_package() {
# create source package
@@ -65,16 +64,16 @@ function make_binary_release() {
cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
- echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}"
- mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}
+ echo "./mvnw clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}"
+ ./mvnw clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}
if [[ $? -ne 0 ]]; then
echo "Build failed. ${BUILD_FLAGS}"
exit 1
fi
# re-create package with proper dir name with binary license
- cd zeppelin-distribution/target/zeppelin-*
- mv zeppelin-* "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
+ cd zeppelin-distribution/target/zeppelin-${RELEASE_VERSION}-bin
+ mv zeppelin-${RELEASE_VERSION}-bin "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
cat ../../src/bin_license/LICENSE >> "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}/LICENSE"
cat ../../src/bin_license/NOTICE >> "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}/NOTICE"
cp ../../src/bin_license/licenses/* "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}/licenses/"
@@ -97,8 +96,10 @@ function make_binary_release() {
git_clone
make_source_package
-make_binary_release netinst "-Pweb-angular -Phadoop-2.6 -pl !beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql,!mongodb,!ksql,!scalding -am"
-make_binary_release all "-Pweb-angular -Phadoop-2.6"
+
+make_binary_release netinst "-Pweb-classic -pl !hbase,!jdbc,!file,!flink,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!java,!neo4j,!sparql,!mongodb,!shell -am"
+
+make_binary_release all "-Pweb-classic -pl !shell"
# remove non release files and dirs
rm -rf "${WORKING_DIR}/zeppelin"
diff --git a/dev/publish_release.sh b/dev/publish_release.sh
index b389fd794e1..1b26253ca6c 100755
--- a/dev/publish_release.sh
+++ b/dev/publish_release.sh
@@ -46,7 +46,7 @@ if [[ $RELEASE_VERSION == *"SNAPSHOT"* ]]; then
DO_SNAPSHOT="yes"
fi
-PUBLISH_PROFILES="-Ppublish-distr -Phadoop-2.6 -Pweb-angular"
+PUBLISH_PROFILES="-Ppublish-distr -Pweb-classic"
PROJECT_OPTIONS="-pl !zeppelin-distribution -Dmaven.javadoc.skip=true"
NEXUS_STAGING="https://repository.apache.org/service/local/staging"
NEXUS_PROFILE="153446d1ac37c4"
@@ -84,20 +84,16 @@ function publish_snapshot_to_maven() {
tmp_repo="$(mktemp -d /tmp/zeppelin-repo-XXXXX)"
- mvn versions:set -DnewVersion=$RELEASE_VERSION
+ ./mvnw versions:set -DnewVersion=$RELEASE_VERSION
tmp_settings="tmp-settings.xml"
echo "" > $tmp_settings
echo "apache.snapshots.https$ASF_USERID" >> $tmp_settings
echo "$ASF_PASSWORD" >> $tmp_settings
echo "" >> $tmp_settings
- mvn --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -Pbeam -DskipTests \
+ ./mvnw --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -DskipTests \
$PUBLISH_PROFILES -Drat.skip=true deploy
- "${BASEDIR}/change_scala_version.sh" 2.11
- mvn -Pscala-2.11 --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -Pbeam -DskipTests \
- $PUBLISH_PROFILES -Drat.skip=true clean deploy
-
rm $tmp_settings
rm -rf $tmp_repo
}
@@ -106,7 +102,7 @@ function publish_to_maven() {
cd "${WORKING_DIR}/zeppelin"
# Force release version
- mvn versions:set -DnewVersion="${RELEASE_VERSION}"
+ ./mvnw versions:set -DnewVersion="${RELEASE_VERSION}"
# Using Nexus API documented here:
# https://support.sonatype.com/hc/en-us/articles/213465868-Uploading-to-a-Staging-Repository-via-REST-API
@@ -128,9 +124,9 @@ function publish_to_maven() {
rm -rf $HOME/.m2/repository/org/apache/zeppelin
# build with scala-2.11
- echo "mvn clean install -DskipTests \
+ echo "./mvnw clean install -DskipTests \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}"
- mvn clean install -DskipTests \
+ ./mvnw clean install -DskipTests \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}
if [[ $? -ne 0 ]]; then
echo "Build failed."
diff --git a/dev/test_zeppelin_pr.py b/dev/test_zeppelin_pr.py
deleted file mode 100755
index 22602d0dd6d..00000000000
--- a/dev/test_zeppelin_pr.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#
-# This utility creates a local branch from specified pullrequest, to help the test and review
-# You'll need to run this utility from master branch with command
-#
-# dev/test_zeppelin_pr.py [#PR]
-#
-# then pr[#PR] branch will be created.
-#
-
-from __future__ import print_function
-import sys, os, subprocess, json, codecs
-
-if sys.version_info[0] == 2:
- from urllib import urlopen
-else:
- from urllib.request import urlopen
-
-if len(sys.argv) == 1:
- print("usage) " + sys.argv[0] + " [#PR]")
- print(" eg) " + sys.argv[0] + " 122")
- sys.exit(1)
-
-
-pr=sys.argv[1]
-githubApi="https://api.github.com/repos/apache/zeppelin"
-
-reader = codecs.getreader("utf-8")
-prInfo = json.load(reader(urlopen(githubApi + "/pulls/" + pr)))
-if "message" in prInfo and prInfo["message"] == "Not Found":
- sys.stderr.write("PullRequest #" + pr + " not found\n")
- sys.exit(1)
-
-prUser=prInfo['user']['login']
-prRepoUrl=prInfo['head']['repo']['clone_url']
-prBranch=prInfo['head']['label'].replace(":", "/")
-print(prBranch)
-
-# create local branch
-exitCode = os.system("git checkout -b pr" + pr)
-if exitCode != 0:
- sys.exit(1)
-
-# add remote repository and fetch
-exitCode = os.system("git remote remove " + prUser)
-exitCode = os.system("git remote add " + prUser + " " + prRepoUrl)
-if exitCode != 0:
- sys.stderr.write("Can not add remote repository.\n")
- sys.exit(1)
-
-exitCode = os.system("git fetch " + prUser)
-if exitCode != 0:
- sys.stderr.write("Can't fetch remote repository.\n")
- sys.exit(1)
-
-
-currentBranch = subprocess.check_output("git rev-parse --abbrev-ref HEAD", shell=True).rstrip().decode("utf-8")
-
-print("Merge branch " + prBranch + " into " + currentBranch)
-
-rev = subprocess.check_output("git rev-parse " + prBranch, shell=True).rstrip().decode("utf-8")
-prAuthor = subprocess.check_output("git --no-pager show -s --format=\"%an <%ae>\" " + rev, shell=True).rstrip().decode("utf-8")
-prAuthorDate = subprocess.check_output("git --no-pager show -s --format=\"%ad\" " + rev, shell=True).rstrip().decode("utf-8")
-
-prTitle = prInfo['title']
-prBody = prInfo['body']
-
-commitList = subprocess.check_output("git log --pretty=format:\"%h\" " + currentBranch + ".." + prBranch, shell=True).rstrip().decode("utf-8")
-authorList = []
-for commitHash in commitList.split("\n"):
- a = subprocess.check_output("git show -s --pretty=format:\"%an <%ae>\" "+commitHash, shell=True).rstrip().decode("utf-8")
- if a not in authorList:
- authorList.append(a)
-
-commitMsg = prTitle + "\n"
-if prBody :
- commitMsg += prBody + "\n\n"
-for author in authorList:
- commitMsg += "Author: " + author +"\n"
-commitMsg += "\n"
-commitMsg += "Closes #" + pr + " from " + prBranch + " and squashes the following commits:\n\n"
-commitMsg += subprocess.check_output("git log --pretty=format:\"%h [%an] %s\" " + currentBranch + ".." + prBranch, shell=True).rstrip().decode("utf-8")
-
-exitCode = os.system("git merge --no-commit --squash " + prBranch)
-if exitCode != 0:
- sys.stderr.write("Can not merge\n")
- sys.exit(1)
-
-exitCode = os.system('git commit -a --author "' + prAuthor + '" --date "' + prAuthorDate + '" -m"' + commitMsg + '"')
-if exitCode != 0:
- sys.stderr.write("Commit failed\n")
- sys.exit(1)
-
-os.system("git remote remove " + prUser)
-print("Branch " + prBranch + " is merged into " + currentBranch)
diff --git a/docs/README.md b/docs/README.md
index 7ca822edbdc..c9646e12957 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -42,7 +42,7 @@ bundle exec jekyll serve --watch
**Run locally using docker**
```
-docker run --rm -it \
+docker run --rm -it \
-v $PWD:/docs \
-w /docs \
-p '4000:4000' \
diff --git a/docs/_config.yml b/docs/_config.yml
index f1f3bb0f545..af2eaf85b84 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -21,7 +21,7 @@ author :
twitter : ASF
feedburner : feedname
-ZEPPELIN_VERSION : 0.10.0-SNAPSHOT
+ZEPPELIN_VERSION : 0.12.0-SNAPSHOT
# The production_url is only used when full-domain names are needed
# such as sitemap.txt
@@ -59,7 +59,7 @@ JB :
# - Only the following values are falsy: ["", null, false]
# - When setting BASE_PATH it must be a valid url.
# This means always setting the protocol (http|https) or prefixing with "/"
- BASE_PATH : /docs/0.10.0-SNAPSHOT
+ BASE_PATH : /docs/0.12.0-SNAPSHOT
# By default, the asset_path is automatically defined relative to BASE_PATH plus the enabled theme.
# ex: [BASE_PATH]/assets/themes/[THEME-NAME]
diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html
index 8bbf6b0034a..9305359fdfb 100644
--- a/docs/_includes/themes/zeppelin/_navigation.html
+++ b/docs/_includes/themes/zeppelin/_navigation.html
@@ -34,8 +34,10 @@
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif b/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif
index 3c827f4b6ec..dd4d1daf9be 100644
Binary files a/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif and b/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_architecture.png b/docs/assets/themes/zeppelin/img/docs-img/flink_architecture.png
new file mode 100644
index 00000000000..6a2a6e9332e
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_architecture.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_docker_tutorial.gif b/docs/assets/themes/zeppelin/img/docs-img/flink_docker_tutorial.gif
new file mode 100644
index 00000000000..aa53c5bab6f
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_docker_tutorial.gif differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_scala_codecompletion.png b/docs/assets/themes/zeppelin/img/docs-img/flink_scala_codecompletion.png
new file mode 100644
index 00000000000..6b6dcda493a
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_scala_codecompletion.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_comment.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_comment.png
new file mode 100644
index 00000000000..6d866ac0d62
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_comment.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_jobname.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_jobname.png
new file mode 100644
index 00000000000..9f8e2f44daf
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_jobname.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_multiple_insert.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_multiple_insert.png
new file mode 100644
index 00000000000..5eaa4acf44e
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_multiple_insert.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_parallelism.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_parallelism.png
new file mode 100644
index 00000000000..260686c8a0f
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_parallelism.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_streaming_wordcount.png b/docs/assets/themes/zeppelin/img/docs-img/flink_streaming_wordcount.png
new file mode 100644
index 00000000000..4b1168b88cb
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_streaming_wordcount.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_udf_jars.png b/docs/assets/themes/zeppelin/img/docs-img/flink_udf_jars.png
new file mode 100644
index 00000000000..c5431b4d90e
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_udf_jars.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif b/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif
index fe7e2e92923..29e38200644 100644
Binary files a/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif and b/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png
deleted file mode 100644
index 9f6d5ab3fd0..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png
deleted file mode 100644
index feec0ccf396..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-logo.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-logo.png
deleted file mode 100644
index 97a63e80967..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-logo.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png
deleted file mode 100644
index ffa1c260bf3..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png
deleted file mode 100644
index 9f43bd29285..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/jdbc_refresh.gif b/docs/assets/themes/zeppelin/img/docs-img/jdbc_refresh.gif
new file mode 100644
index 00000000000..e891ceb5af1
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/jdbc_refresh.gif differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/python_pandas_sql.png b/docs/assets/themes/zeppelin/img/docs-img/python_pandas_sql.png
new file mode 100644
index 00000000000..960e6b0e48d
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/python_pandas_sql.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/python_zshow_df.png b/docs/assets/themes/zeppelin/img/docs-img/python_zshow_df.png
new file mode 100644
index 00000000000..ce654f39b35
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/python_zshow_df.png differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/r_shiny_app.gif b/docs/assets/themes/zeppelin/img/docs-img/r_shiny_app.gif
new file mode 100644
index 00000000000..21c243241e0
Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/r_shiny_app.gif differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME16.png b/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME16.png
deleted file mode 100644
index f925d47c17e..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME16.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME24.png b/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME24.png
deleted file mode 100644
index 0eaa063d608..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME24.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_deprecate.png b/docs/assets/themes/zeppelin/img/docs-img/spark_deprecate.png
deleted file mode 100644
index 8a867ccecb4..00000000000
Binary files a/docs/assets/themes/zeppelin/img/docs-img/spark_deprecate.png and /dev/null differ
diff --git a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png b/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png
deleted file mode 100644
index b90b982e1d8..00000000000
Binary files a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png and /dev/null differ
diff --git a/docs/development/contribution/how_to_contribute_code.md b/docs/development/contribution/how_to_contribute_code.md
index 3683428f985..32543c0797a 100644
--- a/docs/development/contribution/how_to_contribute_code.md
+++ b/docs/development/contribution/how_to_contribute_code.md
@@ -70,19 +70,19 @@ Before making a pull request, please take a look [Contribution Guidelines](http:
### Build
```bash
-mvn install
+./mvnw install
```
To skip test
```bash
-mvn install -DskipTests
+./mvnw install -DskipTests
```
To build with specific spark / hadoop version
```bash
-mvn install -Dspark.version=x.x.x -Dhadoop.version=x.x.x
+./mvnw install -Dspark.version=x.x.x -Dhadoop.version=x.x.x
```
For the further
@@ -97,7 +97,7 @@ For the further
```bash
cd zeppelin-server
HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME \
-mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args=""
+./mvnw exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args=""
```
#### Option 2 - Daemon Script
@@ -105,7 +105,7 @@ mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexe
> **Note:** Make sure you first run
```bash
-mvn clean install -DskipTests
+./mvnw clean install -DskipTests
```
in your zeppelin root directory, otherwise your server build will fail to find the required dependencies in the local repro.
@@ -137,17 +137,17 @@ cd /zeppelin-interpreter/src/main/thrift
### Run Selenium test
-Zeppelin has [set of integration tests](https://github.com/apache/zeppelin/tree/master/zeppelin-server/src/test/java/org/apache/zeppelin/integration) using Selenium. To run these test, first build and run Zeppelin and make sure Zeppelin is running on port 8080. Then you can run test using following command
+Zeppelin has [set of integration tests](https://github.com/apache/zeppelin/tree/master/zeppelin-integration/src/test/java/org/apache/zeppelin/integration) using Selenium. To run these test, first build and run Zeppelin and make sure Zeppelin is running on port 8080. Then you can run test using following command
```bash
-TEST_SELENIUM=true mvn test -Dtest=[TEST_NAME] -DfailIfNoTests=false \
+TEST_SELENIUM=true ./mvnw test -Dtest=[TEST_NAME] -DfailIfNoTests=false \
-pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server'
```
-For example, to run [ParagraphActionIT](https://github.com/apache/zeppelin/blob/master/zeppelin-server/src/test/java/org/apache/zeppelin/integration/ParagraphActionsIT.java),
+For example, to run [ParagraphActionIT](https://github.com/apache/zeppelin/blob/master/zeppelin-integration/src/test/java/org/apache/zeppelin/integration/ParagraphActionsIT.java),
```bash
-TEST_SELENIUM=true mvn test -Dtest=ParagraphActionsIT -DfailIfNoTests=false \
+TEST_SELENIUM=true ./mvnw test -Dtest=ParagraphActionsIT -DfailIfNoTests=false \
-pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server'
```
diff --git a/docs/development/contribution/useful_developer_tools.md b/docs/development/contribution/useful_developer_tools.md
index 17ca40307f5..47f3a84dc61 100644
--- a/docs/development/contribution/useful_developer_tools.md
+++ b/docs/development/contribution/useful_developer_tools.md
@@ -61,28 +61,27 @@ you can use this function like `setjdk 1.8` / `setjdk 1.7`
```bash
# build `zeppelin-web` only
-mvn clean -pl 'zeppelin-web' package -DskipTests;
+./mvnw clean -pl 'zeppelin-web' package -DskipTests;
# build `zeppelin-server` and its dependencies only
-mvn clean package -pl 'spark,spark-dependencies,python,markdown,zeppelin-server' --am -DskipTests
+./mvnw clean package -pl 'spark,spark-dependencies,python,markdown,zeppelin-server' --am -DskipTests
-# build spark related modules with default profiles: scala 2.10
-mvn clean package -pl 'spark,spark-dependencies,zeppelin-server' --am -DskipTests
+# build spark related modules with default profiles
+./mvnw clean package -pl 'spark,spark-dependencies,zeppelin-server' --am -DskipTests
-# build spark related modules with profiles: scala 2.11, spark 2.1 hadoop 2.7
-./dev/change_scala_version.sh 2.11
-mvn clean package -Pspark-2.1 -Phadoop-2.7 -Pscala-2.11 \
+# build spark related modules with profiles: scala 2.13, spark 3.5 hadoop 3.3
+./mvnw clean package -Pspark-scala-2.13 -Pspark-3.5 -Phadoop-3.3 \
-pl 'spark,spark-dependencies,zeppelin-server' --am -DskipTests
# build `zeppelin-server` and `markdown` with dependencies
-mvn clean package -pl 'markdown,zeppelin-server' --am -DskipTests
+./mvnw clean package -pl 'markdown,zeppelin-server' --am -DskipTests
```
### Running Individual Tests
```bash
# run the `HeliumBundleFactoryTest` test class
-mvn test -pl 'zeppelin-server' --am -DfailIfNoTests=false -Dtest=HeliumBundleFactoryTest
+./mvnw test -pl 'zeppelin-server' --am -DfailIfNoTests=false -Dtest=HeliumBundleFactoryTest
```
### Running Selenium Tests
@@ -91,12 +90,12 @@ Make sure that Zeppelin instance is started to execute integration tests (= sele
```bash
# run the `SparkParagraphIT` test class
-TEST_SELENIUM="true" mvn test -pl 'zeppelin-server' --am \
+TEST_SELENIUM="true" ./mvnw test -pl 'zeppelin-server' --am \
-DfailIfNoTests=false -Dtest=SparkParagraphIT
# run the `testSqlSpark` test function only in the `SparkParagraphIT` class
# but note that, some test might be dependent on the previous tests
-TEST_SELENIUM="true" mvn test -pl 'zeppelin-server' --am \
+TEST_SELENIUM="true" ./mvnw test -pl 'zeppelin-server' --am \
-DfailIfNoTests=false -Dtest=SparkParagraphIT#testSqlSpark
```
diff --git a/docs/development/helium/overview.md b/docs/development/helium/overview.md
index 08a401af462..92da7e8a932 100644
--- a/docs/development/helium/overview.md
+++ b/docs/development/helium/overview.md
@@ -40,4 +40,4 @@ Currently, Helium supports 4 types of package.
## Configuration
Zeppelin ships with several builtin helium plugins which is located in $ZEPPELIN_HOME/heliums. If you want to try more types of heliums plugins,
-you can configure `zeppelin.helium.registry` to be `helium,https://s3.amazonaws.com/helium-package/helium.json` in zeppelin-site.xml. `https://s3.amazonaws.com/helium-package/helium.json` will be updated regularly.
+you can configure `zeppelin.helium.registry` to be `helium,https://zeppelin.apache.org/helium.json` in zeppelin-site.xml. `https://zeppelin.apache.org/helium.json` will be updated regularly.
diff --git a/docs/development/writing_zeppelin_interpreter.md b/docs/development/writing_zeppelin_interpreter.md
index 33ecee1631e..fa4970a293c 100644
--- a/docs/development/writing_zeppelin_interpreter.md
+++ b/docs/development/writing_zeppelin_interpreter.md
@@ -236,7 +236,7 @@ To configure your interpreter you need to follow these steps:
2. In the interpreter page, click the `+Create` button and configure your interpreter properties.
Now you are done and ready to use your interpreter.
-> **Note :** Interpreters released with zeppelin have a [default configuration](https://github.com/apache/zeppelin/blob/master/zeppelin-zengine/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java#L397) which is used when there is no `conf/zeppelin-site.xml`.
+> **Note :** Interpreters released with zeppelin have a [default configuration](https://github.com/apache/zeppelin/blob/master/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java#L928) which is used when there is no `conf/zeppelin-site.xml`.
## Use your interpreter
diff --git a/docs/index.md b/docs/index.md
index f1cf29e5138..75d362ca868 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -111,7 +111,6 @@ limitations under the License.
* [S3 Storage](./setup/storage/storage.html#notebook-storage-in-s3)
* [Azure Storage](./setup/storage/storage.html#notebook-storage-in-azure)
* [Google Cloud Storage](./setup/storage/storage.html#notebook-storage-in-gcs)
- * [ZeppelinHub Storage](./setup/storage/storage.html#notebook-storage-in-zeppelinhub)
* [MongoDB Storage](./setup/storage/storage.html#notebook-storage-in-mongodb)
* Operation
* [Configuration](./setup/operation/configuration.html): lists for Apache Zeppelin
@@ -136,42 +135,29 @@ limitations under the License.
#### Available Interpreters
* [Alluxio](./interpreter/alluxio.html)
- * [Beam](./interpreter/beam.html)
* [BigQuery](./interpreter/bigquery.html)
* [Cassandra](./interpreter/cassandra.html)
* [Elasticsearch](./interpreter/elasticsearch.html)
* [Flink](./interpreter/flink.html)
- * [Geode](./interpreter/geode.html)
* [Groovy](./interpreter/groovy.html)
- * [Hazelcast Jet](./interpreter/hazelcastjet.html)
* [HBase](./interpreter/hbase.html)
* [HDFS](./interpreter/hdfs.html)
* [Hive](./interpreter/hive.html)
- * [Ignite](./interpreter/ignite.html)
* [influxDB](./interpreter/influxdb.html)
* [Java](./interpreter/java.html)
* [JDBC](./interpreter/jdbc.html)
* [Jupyter](./interpreter/jupyter.html)
- * [Kotlin](./interpreter/kotlin.html)
- * [KSQL](./interpreter/ksql.html)
- * [Kylin](./interpreter/kylin.html)
- * [Lens](./interpreter/lens.html)
* [Livy](./interpreter/livy.html)
* [Mahout](./interpreter/mahout.html)
* [Markdown](./interpreter/markdown.html)
* [MongoDB](./interpreter/mongodb.html)
* [Neo4j](./interpreter/neo4j.html)
- * [Pig](./interpreter/pig.html)
* [Postgresql, HAWQ](./interpreter/postgresql.html)
* [Python](./interpreter/python.html)
* [R](./interpreter/r.html)
- * [SAP](./interpreter/sap.html)
- * [Scalding](./interpreter/scalding.html)
- * [Scio](./interpreter/scio.html)
* [Shell](./interpreter/shell.html)
* [Spark](./interpreter/spark.html)
* [Sparql](./interpreter/sparql.html)
- * [Submarine](./interpreter/submarine.html)
#### External Resources
* [Mailing List](https://zeppelin.apache.org/community.html)
diff --git a/docs/interpreter/beam.md b/docs/interpreter/beam.md
deleted file mode 100644
index d992b8ee5b5..00000000000
--- a/docs/interpreter/beam.md
+++ /dev/null
@@ -1,116 +0,0 @@
----
-layout: page
-title: Beam interpreter in Apache Zeppelin
-description: Apache Beam is an open source, unified programming model that you can use to create a data processing pipeline.
-group: interpreter
----
-
-
-{% include JB/setup %}
-
-# Beam interpreter for Apache Zeppelin
-
-
-
-## Overview
-[Apache Beam](http://beam.incubator.apache.org) is an open source unified platform for data processing pipelines. A pipeline can be build using one of the Beam SDKs.
-The execution of the pipeline is done by different Runners. Currently, Beam supports Apache Flink Runner, Apache Spark Runner, and Google Dataflow Runner.
-
-## How to use
-Basically, you can write normal Beam java code where you can determine the Runner. You should write the main method inside a class becuase the interpreter invoke this main to execute the pipeline. Unlike Zeppelin normal pattern, each paragraph is considered as a separate job, there isn't any relation to any other paragraph.
-
-The following is a demonstration of a word count example with data represented in array of strings
-But it can read data from files by replacing `Create.of(SENTENCES).withCoder(StringUtf8Coder.of())` with `TextIO.Read.from("path/to/filename.txt")`
-
-```java
-%beam
-
-// most used imports
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.transforms.Create;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-import java.util.ArrayList;
-import org.apache.beam.runners.direct.*;
-import org.apache.beam.sdk.runners.*;
-import org.apache.beam.sdk.options.*;
-import org.apache.beam.runners.flink.*;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.options.PipelineOptions;
-
-public class MinimalWordCount {
- static List s = new ArrayList<>();
-
- static final String[] SENTENCES_ARRAY = new String[] {
- "Hadoop is the Elephant King!",
- "A yellow and elegant thing.",
- "He never forgets",
- "Useful data, or lets",
- "An extraneous element cling!",
- "A wonderful king is Hadoop.",
- "The elephant plays well with Sqoop.",
- "But what helps him to thrive",
- "Are Impala, and Hive,",
- "And HDFS in the group.",
- "Hadoop is an elegant fellow.",
- "An elephant gentle and mellow.",
- "He never gets mad,",
- "Or does anything bad,",
- "Because, at his core, he is yellow",
- };
- static final List SENTENCES = Arrays.asList(SENTENCES_ARRAY);
- public static void main(String[] args) {
- PipelineOptions options = PipelineOptionsFactory.create().as(PipelineOptions.class);
- options.setRunner(FlinkRunner.class);
- Pipeline p = Pipeline.create(options);
- p.apply(Create.of(SENTENCES).withCoder(StringUtf8Coder.of()))
- .apply("ExtractWords", ParDo.of(new DoFn() {
- @ProcessElement
- public void processElement(ProcessContext c) {
- for (String word : c.element().split("[^a-zA-Z']+")) {
- if (!word.isEmpty()) {
- c.output(word);
- }
- }
- }
- }))
- .apply(Count. perElement())
- .apply("FormatResults", ParDo.of(new DoFn, String>() {
- @ProcessElement
- public void processElement(DoFn, String>.ProcessContext arg0)
- throws Exception {
- s.add("\n" + arg0.element().getKey() + "\t" + arg0.element().getValue());
- }
- }));
- p.run();
- System.out.println("%table word\tcount");
- for (int i = 0; i < s.size(); i++) {
- System.out.print(s.get(i));
- }
-
- }
-}
-
-```
-
diff --git a/docs/interpreter/bigquery.md b/docs/interpreter/bigquery.md
index cdac762f6db..da696a74f2e 100644
--- a/docs/interpreter/bigquery.md
+++ b/docs/interpreter/bigquery.md
@@ -53,6 +53,11 @@ limitations under the License.
BigQuery SQL dialect (standardSQL or legacySQL). If empty, [query prefix](https://cloud.google.com/bigquery/docs/reference/standard-sql/enabling-standard-sql#sql-prefix) like '#standardSQL' can be used.
+
+
zeppelin.bigquery.region
+
+
BigQuery dataset region (Needed for single region dataset)
+
@@ -68,7 +73,7 @@ In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon a
Within Google Cloud Platform (e.g. Google App Engine, Google Compute Engine),
built-in credentials are used by default.
-Outside of GCP, follow the Google API authentication instructions for [Zeppelin Google Cloud Storage](https://zeppelin.apache.org/docs/latest/storage/storage.html#notebook-storage-in-gcs)
+Outside of GCP, follow the Google API authentication instructions for [Zeppelin Google Cloud Storage](https://zeppelin.apache.org/docs/latest/setup/storage/storage.html#notebook-storage-in-google-cloud-storage)
## Using the BigQuery Interpreter
diff --git a/docs/interpreter/cassandra.md b/docs/interpreter/cassandra.md
index 0de7b51b1f8..a49ae7e2421 100644
--- a/docs/interpreter/cassandra.md
+++ b/docs/interpreter/cassandra.md
@@ -163,7 +163,7 @@ The complete list of all CQL statements and versions can be found below:
-
http://docs.datastax.com/en/cql/3.1/cql/cql_intro_c.html
diff --git a/docs/interpreter/flink.md b/docs/interpreter/flink.md
index 01ea99e5af7..df272cbdb9b 100644
--- a/docs/interpreter/flink.md
+++ b/docs/interpreter/flink.md
@@ -24,9 +24,10 @@ limitations under the License.
## Overview
-[Apache Flink](https://flink.apache.org) is an open source platform for distributed stream and batch data processing. Flink’s core is a streaming dataflow engine that provides data distribution, communication, and fault tolerance for distributed computations over data streams. Flink also builds batch processing on top of the streaming engine, overlaying native iteration support, managed memory, and program optimization.
+[Apache Flink](https://flink.apache.org) is a framework and distributed processing engine for stateful computations over unbounded and bounded data streams.
+Flink has been designed to run in all common cluster environments, perform computations at in-memory speed and at any scale.
-In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to support the latest version of Flink. **Only Flink 1.10+ is supported, old versions of flink won't work.**
+In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to support the latest version of Flink. **Currently, only Flink 1.15+ is supported, old versions of flink won't work.**
Apache Flink is supported in Zeppelin with the Flink interpreter group which consists of the five interpreters listed below.
@@ -62,13 +63,111 @@ Apache Flink is supported in Zeppelin with the Flink interpreter group which con
+## Main Features
+
+
+
+
Feature
+
Description
+
+
+
Support multiple versions of Flink
+
You can run different versions of Flink in one Zeppelin instance
+
+
+
Support multiple languages
+
Scala, Python, SQL are supported, besides that you can also collaborate across languages, e.g. you can write Scala UDF and use it in PyFlink
+
+
+
Support multiple execution modes
+
Local | Remote | Yarn | Yarn Application
+
+
+
Support Hive
+
Hive catalog is supported
+
+
+
Interactive development
+
Interactive development user experience increase your productivity
+
+
+
Enhancement on Flink SQL
+
* Support both streaming sql and batch sql in one notebook
+* Support sql comment (single line comment/multiple line comment)
+* Support advanced configuration (jobName, parallelism)
+* Support multiple insert statements
+
+
+
Multi-tenancy
+
Multiple user can work in one Zeppelin instance without affecting each other.
+
+
+
+
Rest API Support
+
You can not only submit Flink job via Zeppelin notebook UI, but also can do that via its rest api (You can use Zeppelin as Flink job server).
+
+
+
+## Play Flink in Zeppelin docker
+
+For beginner, we would suggest you to play Flink in Zeppelin docker.
+First you need to download Flink, because there's no Flink binary distribution shipped with Zeppelin.
+e.g. Here we download Flink 1.12.2 to`/mnt/disk1/flink-1.12.2`,
+and we mount it to Zeppelin docker container and run the following command to start Zeppelin docker.
+
+```bash
+docker run -u $(id -u) -p 8080:8080 -p 8081:8081 --rm -v /mnt/disk1/flink-1.12.2:/opt/flink -e FLINK_HOME=/opt/flink --name zeppelin apache/zeppelin:0.10.0
+```
+
+After running the above command, you can open `http://localhost:8080` to play Flink in Zeppelin. We only verify the flink local mode in Zeppelin docker, other modes may not due to network issues.
+`-p 8081:8081` is to expose Flink web ui, so that you can access Flink web ui via `http://localhost:8081`.
+
+Here's screenshot of running note `Flink Tutorial/5. Streaming Data Analytics`
+
+
+
+
+You can also mount notebook folder to replace the built-in zeppelin tutorial notebook.
+e.g. Here's a repo of Flink sql cookbook on Zeppelin: [https://github.com/zjffdu/flink-sql-cookbook-on-zeppelin/](https://github.com/zjffdu/flink-sql-cookbook-on-zeppelin/)
+
+You can clone this repo and mount it to docker,
+
+```
+docker run -u $(id -u) -p 8080:8080 --rm -v /mnt/disk1/flink-sql-cookbook-on-zeppelin:/notebook -v /mnt/disk1/flink-1.12.2:/opt/flink -e FLINK_HOME=/opt/flink -e ZEPPELIN_NOTEBOOK_DIR='/notebook' --name zeppelin apache/zeppelin:0.10.0
+```
+
## Prerequisites
-* Download Flink 1.10 for scala 2.11 (Only scala-2.11 is supported, scala-2.12 is not supported yet in Zeppelin)
+Download Flink 1.15 or afterwards (Only Scala 2.12 is supported)
+
+### Version-specific notes for Flink
+
+Flink 1.15 is scala free and has changed its binary distribution, the following extra steps is required.
+* Move FLINK_HOME/opt/flink-table-planner_2.12-1.15.0.jar to FLINK_HOME/lib
+* Move FLINK_HOME/lib/flink-table-planner-loader-1.15.0.jar to FLINK_HOME/opt
+* Download flink-table-api-scala-bridge_2.12-1.15.0.jar and flink-table-api-scala_2.12-1.15.0.jar to FLINK_HOME/lib
+
+Flink 1.16 introduces new `ClientResourceManager` for sql client, you need to move `FLINK_HOME/opt/flink-sql-client-1.16.0.jar` to `FLINK_HOME/lib`
+
+## Flink on Zeppelin Architecture
+
+
+
+The above diagram is the architecture of Flink on Zeppelin. Flink interpreter on the left side is actually a Flink client
+which is responsible for compiling and managing Flink job lifecycle, such as submit, cancel job,
+monitoring job progress and so on. The Flink cluster on the right side is the place where executing Flink job.
+It could be a MiniCluster (local mode), Standalone cluster (remote mode),
+Yarn session cluster (yarn mode) or Yarn application session cluster (yarn-application mode)
+
+There are 2 important components in Flink interpreter: Scala shell & Python shell
+
+* Scala shell is the entry point of Flink interpreter, it would create all the entry points of Flink program, such as ExecutionEnvironment,StreamExecutionEnvironment and TableEnvironment. Scala shell is responsible for compiling and running Scala code and sql.
+* Python shell is the entry point of PyFlink, it is responsible for compiling and running Python code.
## Configuration
+
The Flink interpreter can be configured with properties provided by Zeppelin (as following table).
-You can also add and set other flink properties which are not listed in the table. For a list of additional properties, refer to [Flink Available Properties](https://ci.apache.org/projects/flink/flink-docs-master/ops/config.html).
+You can also add and set other Flink properties which are not listed in the table. For a list of additional properties, refer to [Flink Available Properties](https://ci.apache.org/projects/flink/flink-docs-master/ops/config.html).
Property
@@ -78,7 +177,7 @@ You can also add and set other flink properties which are not listed in the tabl
`FLINK_HOME`
-
Location of flink installation. It is must be specified, otherwise you can not use flink in Zeppelin
+
Location of Flink installation. It is must be specified, otherwise you can not use Flink in Zeppelin
`HADOOP_CONF_DIR`
@@ -93,7 +192,7 @@ You can also add and set other flink properties which are not listed in the tabl
flink.execution.mode
local
-
Execution mode of flink, e.g. local | yarn | remote
+
Execution mode of Flink, e.g. local | remote | yarn | yarn-application
flink.execution.remote.host
@@ -108,12 +207,12 @@ You can also add and set other flink properties which are not listed in the tabl
jobmanager.memory.process.size
1024m
-
Total number of memory of JobManager, e.g. 1024m. It is official [flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)
+
Total memory size of JobManager, e.g. 1024m. It is official [Flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)
taskmanager.memory.process.size
1024m
-
Total number of memory of TaskManager, e.g. 1024m. It is official [flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)
+
Total memory size of TaskManager, e.g. 1024m. It is official [Flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)
taskmanager.numberOfTaskSlots
@@ -138,32 +237,32 @@ You can also add and set other flink properties which are not listed in the tabl
zeppelin.flink.uiWebUrl
-
User specified Flink JobManager url, it could be used in remote mode where Flink cluster is already started, or could be used as url template, e.g. https://knox-server:8443/gateway/cluster-topo/yarn/proxy/{{applicationId}}/ where {{applicationId}} would be replaced with yarn app id
+
User specified Flink JobManager url, it could be used in remote mode where Flink cluster is already started, or could be used as url template, e.g. https://knox-server:8443/gateway/cluster-topo/yarn/proxy/{% raw %}{{applicationId}}{% endraw %}/ where {% raw %}{{applicationId}}{% endraw %} is placeholder of yarn app id
zeppelin.flink.run.asLoginUser
true
-
Whether run flink job as the zeppelin login user, it is only applied when running flink job in hadoop yarn cluster and shiro is enabled
+
Whether run Flink job as the Zeppelin login user, it is only applied when running Flink job in hadoop yarn cluster and shiro is enabled
flink.udf.jars
-
Flink udf jars (comma separated), zeppelin will register udf in this jar automatically for user. These udf jars could be either local files or hdfs files if you have hadoop installed. The udf name is the class name.
+
Flink udf jars (comma separated), Zeppelin will register udf in these jars automatically for user. These udf jars could be either local files or hdfs files if you have hadoop installed. The udf name is the class name.
flink.udf.jars.packages
-
Packages (comma separated) that would be searched for the udf defined in `flink.udf.jars`.
+
Packages (comma separated) that would be searched for the udf defined in `flink.udf.jars`. Specifying this can reduce the number of classes to scan, otherwise all the classes in udf jar will be scanned.
flink.execution.jars
-
Additional user jars (comma separated), these jars could be either local files or hdfs files if you have hadoop installed.
+
Additional user jars (comma separated), these jars could be either local files or hdfs files if you have hadoop installed. It can be used to specify Flink connector jars or udf jars (no udf class auto-registration like `flink.udf.jars`)
flink.execution.packages
-
Additional user packages (comma separated), e.g. org.apache.flink:flink-connector-kafka_2.11:1.10,org.apache.flink:flink-connector-kafka-base_2.11:1.10.0,org.apache.flink:flink-json:1.10.0
+
Additional user packages (comma separated), e.g. `org.apache.flink:flink-json:1.10.0`
zeppelin.flink.concurrentBatchSql.max
@@ -183,14 +282,18 @@ You can also add and set other flink properties which are not listed in the tabl
table.exec.resource.default-parallelism
1
-
Default parallelism for flink sql job
+
Default parallelism for Flink sql job
zeppelin.flink.scala.color
true
-
Whether display scala shell output in colorful format
+
Whether display Scala shell output in colorful format
+
+
+
zeppelin.flink.scala.shell.tmp_dir
+
+
Temp folder for storing scala shell compiled jar
-
zeppelin.flink.enableHive
false
@@ -198,171 +301,333 @@ You can also add and set other flink properties which are not listed in the tabl
zeppelin.flink.hive.version
-
2.3.4
+
2.3.7
Hive version that you would like to connect
zeppelin.flink.module.enableHive
false
-
Whether enable hive module, hive udf take precedence over flink udf if hive module is enabled.
+
Whether enable hive module, hive udf take precedence over Flink udf if hive module is enabled.
zeppelin.flink.maxResult
1000
max number of row returned by sql interpreter
+
+
`zeppelin.flink.job.check_interval`
+
1000
+
Check interval (in milliseconds) to check Flink job progress
+
`flink.interpreter.close.shutdown_cluster`
true
-
Whether shutdown application when closing interpreter
+
Whether shutdown Flink cluster when closing interpreter
`zeppelin.interpreter.close.cancel_job`
true
-
Whether cancel flink job when closing interpreter
-
-
-
`zeppelin.flink.job.check_interval`
-
1000
-
Check interval (in milliseconds) to check flink job progress
+
Whether cancel Flink job when closing interpreter
-## StreamExecutionEnvironment, ExecutionEnvironment, StreamTableEnvironment, BatchTableEnvironment
+## Interpreter Binding Mode
-Zeppelin will create 6 variables as flink scala (`%flink`) entry point:
+The default [interpreter binding mode](../usage/interpreter/interpreter_binding_mode.html) is `globally shared`. That means all notes share the same Flink interpreter which means they share the same Flink cluster.
+In practice, we would recommend you to use `isolated per note` which means each note has own Flink interpreter without affecting each other (Each one has his own Flink cluster).
-* `senv` (StreamExecutionEnvironment),
-* `benv` (ExecutionEnvironment)
-* `stenv` (StreamTableEnvironment for blink planner)
-* `btenv` (BatchTableEnvironment for blink planner)
-* `stenv_2` (StreamTableEnvironment for flink planner)
-* `btenv_2` (BatchTableEnvironment for flink planner)
-And will create 6 variables as pyflink (`%flink.pyflink` or `%flink.ipyflink`) entry point:
+## Execution Mode
-* `s_env` (StreamExecutionEnvironment),
-* `b_env` (ExecutionEnvironment)
-* `st_env` (StreamTableEnvironment for blink planner)
-* `bt_env` (BatchTableEnvironment for blink planner)
-* `st_env_2` (StreamTableEnvironment for flink planner)
-* `bt_env_2` (BatchTableEnvironment for flink planner)
+Flink in Zeppelin supports 4 execution modes (`flink.execution.mode`):
-## Blink/Flink Planner
+* Local
+* Remote
+* Yarn
+* Yarn Application
-There are 2 planners supported by Flink's table api: `flink` & `blink`.
+### Local Mode
-* If you want to use DataSet api, and convert it to flink table then please use flink planner (`btenv_2` and `stenv_2`).
-* In other cases, we would always recommend you to use `blink` planner. This is also what flink batch/streaming sql interpreter use (`%flink.bsql` & `%flink.ssql`)
+Running Flink in local mode will start a MiniCluster in local JVM. By default, the local MiniCluster use port 8081, so make sure this port is available in your machine,
+otherwise you can configure `rest.port` to specify another port. You can also specify `local.number-taskmanager` and `flink.tm.slot` to customize the number of TM and number of slots per TM.
+Because by default it is only 4 TM with 1 slot in this MiniCluster which may not be enough for some cases.
-Check this [page](https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/common.html#main-differences-between-the-two-planners) for the difference between flink planner and blink planner.
+### Remote Mode
+Running Flink in remote mode will connect to an existing Flink cluster which could be standalone cluster or yarn session cluster. Besides specifying `flink.execution.mode` to be `remote`, you also need to specify
+`flink.execution.remote.host` and `flink.execution.remote.port` to point to Flink job manager's rest api address.
-## Execution mode (Local/Remote/Yarn/Yarn Application)
+### Yarn Mode
-Flink in Zeppelin supports 4 execution modes (`flink.execution.mode`):
+In order to run Flink in Yarn mode, you need to make the following settings:
-* Local
-* Remote
-* Yarn
-* Yarn Application
+* Set `flink.execution.mode` to be `yarn`
+* Set `HADOOP_CONF_DIR` in Flink's interpreter setting or `zeppelin-env.sh`.
+* Make sure `hadoop` command is on your `PATH`. Because internally Flink will call command `hadoop classpath` and load all the hadoop related jars in the Flink interpreter process
-### Run Flink in Local Mode
+In this mode, Zeppelin would launch a Flink yarn session cluster for you and destroy it when you shutdown your Flink interpreter.
-Running Flink in Local mode will start a MiniCluster in local JVM. By default, the local MiniCluster will use port 8081, so make sure this port is available in your machine,
-otherwise you can configure `rest.port` to specify another port. You can also specify `local.number-taskmanager` and `flink.tm.slot` to customize the number of TM and number of slots per TM,
-because by default it is only 4 TM with 1 Slots which may not be enough for some cases.
+### Yarn Application Mode
-### Run Flink in Remote Mode
+In the above yarn mode, there will be a separated Flink interpreter process on the Zeppelin server host. However, this may run out of resources when there are too many interpreter processes.
+So in practise, we would recommend you to use yarn application mode if you are using Flink 1.11 or afterwards (yarn application mode is only supported after Flink 1.11).
+In this mode Flink interpreter runs in the JobManager which is in yarn container.
+In order to run Flink in yarn application mode, you need to make the following settings:
-Running Flink in remote mode will connect to an existing flink cluster which could be standalone cluster or yarn session cluster. Besides specifying `flink.execution.mode` to be `remote`. You also need to specify
-`flink.execution.remote.host` and `flink.execution.remote.port` to point to flink job manager.
+* Set `flink.execution.mode` to be `yarn-application`
+* Set `HADOOP_CONF_DIR` in Flink's interpreter setting or `zeppelin-env.sh`.
+* Make sure `hadoop` command is on your `PATH`. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in Flink interpreter process
-### Run Flink in Yarn Mode
-In order to run flink in Yarn mode, you need to make the following settings:
+## Flink Scala
-* Set `flink.execution.mode` to `yarn`
-* Set `HADOOP_CONF_DIR` in flink's interpreter setting or `zeppelin-env.sh`.
-* Make sure `hadoop` command is on your PATH. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in the flink interpreter process
+Scala is the default language of Flink on Zeppelin(`%flink`), and it is also the entry point of Flink interpreter. Underneath Flink interpreter will create Scala shell
+which would create several built-in variables, including ExecutionEnvironment,StreamExecutionEnvironment and so on.
+So don't create these Flink environment variables again, otherwise you might hit weird issues. The Scala code you write in Zeppelin will be submitted to this Scala shell.
+Here are the builtin variables created in Flink Scala shell.
-### Run Flink in Yarn Application Mode
+* senv (StreamExecutionEnvironment),
+* benv (ExecutionEnvironment)
+* stenv (StreamTableEnvironment for blink planner (aka. new planner))
+* btenv (BatchTableEnvironment for blink planner (aka. new planner))
+* z (ZeppelinContext)
-In the above yarn mode, there will be a separated flink interpreter process. This may run out of resources when there're many interpreter processes.
-So it is recommended to use yarn application mode if you are using flink 1.11 or afterwards (yarn application mode is only supported after flink 1.11). In this mode flink interpreter runs in the JobManager which is in yarn container.
-In order to run flink in yarn application mode, you need to make the following settings:
+### Blink/Flink Planner
-* Set `flink.execution.mode` to `yarn-application`
-* Set `HADOOP_CONF_DIR` in flink's interpreter setting or `zeppelin-env.sh`.
-* Make sure `hadoop` command is on your PATH. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in the flink interpreter process
+After Zeppelin 0.11, we remove the support of flink planner (aka. old planner) which is also removed after Flink 1.14.
+### Stream WordCount Example
-## How to use Hive
+You can write whatever Scala code in Zeppelin.
-In order to use Hive in Flink, you have to make the following setting.
+e.g. in the following example, we write a classical streaming wordcount example.
-* Set `zeppelin.flink.enableHive` to be true
-* Set `zeppelin.flink.hive.version` to be the hive version you are using.
-* Set `HIVE_CONF_DIR` to be the location where `hive-site.xml` is located. Make sure hive metastore is started and you have configured `hive.metastore.uris` in `hive-site.xml`
-* Copy the following dependencies to the lib folder of flink installation.
- * flink-connector-hive_2.11–1.10.0.jar
- * flink-hadoop-compatibility_2.11–1.10.0.jar
- * hive-exec-2.x.jar (for hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303–0.9.2.jar and libthrift-0.9.2.jar)
+
+
+
+### Code Completion
+
+You can type tab for code completion.
+
+
+
+### ZeppelinContext
+
+`ZeppelinContext` provides some additional functions and utilities.
+See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details.
+For Flink interpreter, you can use `z` to display Flink `Dataset/Table`.
+
+e.g. you can use `z.show` to display DataSet, Batch Table, Stream Table.
+
+* z.show(DataSet)
+
+
+
+
+* z.show(Batch Table)
+
+
+
+
+* z.show(Stream Table)
+
+
+
+
+## Flink SQL
+
+In Zeppelin, there are 2 kinds of Flink sql interpreter you can use
+
+* `%flink.ssql`
+Streaming Sql interpreter which launch Flink streaming job via `StreamTableEnvironment`
+* `%flink.bsql`
+Batch Sql interpreter which launch Flink batch job via `BatchTableEnvironment`
+
+Flink Sql interpreter in Zeppelin is equal to Flink Sql-client + many other enhancement features.
+
+### Enhancement SQL Features
+
+#### Support batch SQL and streaming sql together.
-## Flink Batch SQL
+In Flink Sql-client, either you run streaming sql or run batch sql in one session. You can not run them together.
+But in Zeppelin, you can do that. `%flink.ssql` is used for running streaming sql, while `%flink.bsql` is used for running batch sql.
+Batch/Streaming Flink jobs run in the same Flink session cluster.
-`%flink.bsql` is used for flink's batch sql. You can type `help` to get all the available commands.
-It supports all the flink sql, including DML/DDL/DQL.
+#### Support multiple statements
-* Use `insert into` statement for batch ETL
-* Use `select` statement for batch data analytics
+You can write multiple sql statements in one paragraph, each sql statement is separated by semicolon.
-## Flink Streaming SQL
+#### Comment support
-`%flink.ssql` is used for flink's streaming sql. You just type `help` to get all the available commands.
-It supports all the flink sql, including DML/DDL/DQL.
+2 kinds of sql comments are supported in Zeppelin:
-* Use `insert into` statement for streaming ETL
-* Use `select` statement for streaming data analytics
+* Single line comment start with `--`
+* Multiple line comment around with `/* */`
-## Streaming Data Visualization
+
+
+
+#### Job parallelism setting
+
+You can set the sql parallelism via paragraph local property: `parallelism`
+
+
+
+#### Support multiple insert
+
+Sometimes you have multiple insert statements which read the same source,
+but write to different sinks. By default, each insert statement would launch a separated Flink job,
+but you can set paragraph local property: `runAsOne` to be `true` to run them in one single Flink job.
+
+
+
+#### Set job name
+
+You can set Flink job name for insert statement via setting paragraph local property: `jobName`. To be noticed,
+you can only set job name for insert statement. Select statement is not supported yet.
+And this kind of setting only works for single insert statement. It doesn't work for multiple insert we mentioned above.
+
+
+
+### Streaming Data Visualization
+
+Zeppelin can visualize the select sql result of Flink streaming job. Overall it supports 3 modes:
-Zeppelin supports 3 types of streaming data analytics:
* Single
* Update
* Append
-### type=single
-Single mode is for the case when the result of sql statement is always one row, such as the following example. The output format is HTML,
+#### Single Mode
+
+Single mode is for the case when the result of sql statement is always one row,
+such as the following example. The output format is HTML,
and you can specify paragraph local property `template` for the final output content template.
-And you can use `{i}` as placeholder for the ith column of result.
+You can use `{i}` as placeholder for the `ith` column of result.
+
+
+
-
+#### Update Mode
-### type=update
-Update mode is suitable for the case when the output is more than one rows, and always will be updated continuously.
+Update mode is suitable for the case when the output is more than one rows, and will always be updated continuously.
Here’s one example where we use group by.
-
+
+
+#### Append Mode
+
+Append mode is suitable for the scenario where output data is always appended.
+E.g. the following example which use tumble window.
+
+
+
+## PyFlink
+
+PyFlink is Python entry point of Flink on Zeppelin, internally Flink interpreter will create Python shell which
+would create Flink's environment variables (including ExecutionEnvironment, StreamExecutionEnvironment and so on).
+To be noticed, the java environment behind Pyflink is created in Scala shell.
+That means underneath Scala shell and Python shell share the same environment.
+These are variables created in Python shell.
+
+* `s_env` (StreamExecutionEnvironment),
+* `b_env` (ExecutionEnvironment)
+* `st_env` (StreamTableEnvironment for blink planner (aka. new planner))
+* `bt_env` (BatchTableEnvironment for blink planner (aka. new planner))
+
+
+### Configure PyFlink
+
+There are 3 things you need to configure to make Pyflink work in Zeppelin.
+
+* Install pyflink
+ e.g. ( pip install apache-flink==1.11.1 ).
+ If you need to use Pyflink udf, then you to install pyflink on all the task manager nodes. That means if you are using yarn, then all the yarn nodes need to install pyflink.
+* Copy `python` folder under `${FLINK_HOME}/opt` to `${FLINK_HOME/lib`.
+* Set `zeppelin.pyflink.python` as the python executable path. By default, it is the python in `PATH`. In case you have multiple versions of python installed, you need to configure `zeppelin.pyflink.python` as the python version you want to use.
+
+### How to use PyFlink
+
+There are 2 ways to use PyFlink in Zeppelin
+
+* `%flink.pyflink`
+* `%flink.ipyflink`
+
+`%flink.pyflink` is much simple and easy, you don't need to do anything except the above setting,
+but its function is also limited. We suggest you to use `%flink.ipyflink` which provides almost the same user experience like jupyter.
+
+### Configure IPyFlink
+
+If you don't have anaconda installed, then you need to install the following 3 libraries.
+
+```
+pip install jupyter
+pip install grpcio
+pip install protobuf
+```
+
+If you have anaconda installed, then you only need to install following 2 libraries.
+
+```
+pip install grpcio
+pip install protobuf
+```
+
+`ZeppelinContext` is also available in PyFlink, you can use it almost the same as in Flink Scala.
+
+Check the [Python doc](python.html) for more features of IPython.
+
+
+## Third party dependencies
+
+It is very common to have third party dependencies when you write Flink job in whatever languages (Scala, Python, Sql).
+It is very easy to add dependencies in IDE (e.g. add dependency in pom.xml),
+but how can you do that in Zeppelin ? Mainly there are 2 settings you can use to add third party dependencies
+
+* flink.execution.packages
+* flink.execution.jars
+
+### flink.execution.packages
+
+This is the recommended way of adding dependencies. Its implementation is the same as adding
+dependencies in `pom.xml`. Underneath it would download all the packages and its transitive dependencies
+from maven repository, then put them on the classpath. Here's one example of how to add kafka connector of Flink 1.10 via [inline configuration](../usage/interpreter/overview.html#inline-generic-configuration).
+
+```
+%flink.conf
+
+flink.execution.packages org.apache.flink:flink-connector-kafka_2.11:1.10.0,org.apache.flink:flink-connector-kafka-base_2.11:1.10.0,org.apache.flink:flink-json:1.10.0
+```
+
+The format is `artifactGroup:artifactId:version`, if you have multiple packages,
+then separate them with comma. `flink.execution.packages` requires internet accessible.
+So if you can not access internet, you need to use `flink.execution.jars` instead.
+
+### flink.execution.jars
+
+If your Zeppelin machine can not access internet or your dependencies are not deployed to maven repository,
+then you can use `flink.execution.jars` to specify the jar files you depend on (each jar file is separated with comma)
+
+Here's one example of how to add kafka dependencies(including kafka connector and its transitive dependencies) via `flink.execution.jars`
+
+```
+%flink.conf
+
+flink.execution.jars /usr/lib/flink-kafka/target/flink-kafka-1.0-SNAPSHOT.jar
+```
-### type=append
-Append mode is suitable for the scenario where output data is always appended. E.g. the following example which use tumble window.
-
-
## Flink UDF
-You can use Flink scala UDF or Python UDF in sql. UDF for batch and streaming sql is the same. Here're 2 examples.
+There are 4 ways you can define UDF in Zeppelin.
+
+* Write Scala UDF
+* Write PyFlink UDF
+* Create UDF via SQL
+* Configure udf jar via flink.udf.jars
-* Scala UDF
+### Scala UDF
```scala
%flink
@@ -370,11 +635,16 @@ You can use Flink scala UDF or Python UDF in sql. UDF for batch and streaming sq
class ScalaUpper extends ScalarFunction {
def eval(str: String) = str.toUpperCase
}
-btenv.registerFunction("scala_upper", new ScalaUpper())
+btenv.registerFunction("scala_upper", new ScalaUpper())
```
-* Python UDF
+It is very straightforward to define scala udf almost the same as what you do in IDE.
+After creating udf class, you need to register it via `btenv`.
+You can also register it via `stenv` which share the same Catalog with `btenv`.
+
+
+### Python UDF
```python
@@ -387,54 +657,78 @@ class PythonUpper(ScalarFunction):
bt_env.register_function("python_upper", udf(PythonUpper(), DataTypes.STRING(), DataTypes.STRING()))
```
+It is also very straightforward to define Python udf almost the same as what you do in IDE.
+After creating udf class, you need to register it via `bt_env`.
+You can also register it via `st_env` which share the same Catalog with `bt_env`.
-Zeppelin only supports scala and python for flink interpreter, if you want to write a java udf or the udf is pretty complicated which make it not suitable to write in Zeppelin,
-then you can write the udf in IDE and build an udf jar.
-In Zeppelin you just need to specify `flink.udf.jars` to this jar, and flink
-interpreter will detect all the udfs in this jar and register all the udfs to TableEnvironment, the udf name is the class name.
+### UDF via SQL
-## PyFlink(%flink.pyflink)
-In order to use PyFlink in Zeppelin, you just need to do the following configuration.
-* Install apache-flink (e.g. pip install apache-flink)
-* Set `zeppelin.pyflink.python` to the python executable where apache-flink is installed in case you have multiple python installed.
-* Copy flink-python_2.11–1.10.0.jar from flink opt folder to flink lib folder
+Some simple udf can be written in Zeppelin. But if the udf logic is very complicated,
+then it is better to write it in IDE, then register it in Zeppelin as following
-And PyFlink will create 6 variables for you:
+```sql
+%flink.ssql
+
+CREATE FUNCTION myupper AS 'org.apache.zeppelin.flink.udf.JavaUpper';
+```
+
+But this kind of approach requires the udf jar must be on `CLASSPATH`,
+so you need to configure `flink.execution.jars` to include this udf jar on `CLASSPATH`, such as following:
+
+```
+%flink.conf
+
+flink.execution.jars /usr/lib/flink-udf-1.0-SNAPSHOT.jar
+```
+
+### flink.udf.jars
+
+The above 3 approaches all have some limitations:
+
+* It is suitable to write simple Scala udf or Python udf in Zeppelin, but not suitable to write very complicated udf in Zeppelin. Because notebook doesn't provide advanced features compared to IDE, such as package management, code navigation and etc.
+* It is not easy to share the udf between notes or users, you have to run the paragraph of defining udf in each flink interpreter.
+
+So when you have many udfs or udf logic is very complicated and you don't want to register them by yourself every time, then you can use `flink.udf.jars`
+
+* Step 1. Create a udf project in your IDE, write your udf there.
+* Step 2. Set `flink.udf.jars` to point to the udf jar you build from your udf project
+
+For example,
+
+```
+%flink.conf
+
+flink.execution.jars /usr/lib/flink-udf-1.0-SNAPSHOT.jar
+```
+
+Zeppelin would scan this jar, find out all the udf classes and then register them automatically for you.
+The udf name is the class name. For example, here's the output of show functions after specifing the above udf jars in `flink.udf.jars`
+
+
+
+By default, Zeppelin would scan all the classes in this jar,
+so it would be pretty slow if your jar is very big specially when your udf jar has other dependencies.
+So in this case we would recommend you to specify `flink.udf.jars.packages` to specify the package to scan,
+this can reduce the number of classes to scan and make the udf detection much faster.
+
+
+## How to use Hive
+
+In order to use Hive in Flink, you have to make the following settings.
+
+* Set `zeppelin.flink.enableHive` to be true
+* Set `zeppelin.flink.hive.version` to be the hive version you are using.
+* Set `HIVE_CONF_DIR` to be the location where `hive-site.xml` is located. Make sure hive metastore is started and you have configured `hive.metastore.uris` in `hive-site.xml`
+* Copy the following dependencies to the lib folder of flink installation.
+ * flink-connector-hive_2.11–*.jar
+ * flink-hadoop-compatibility_2.11–*.jar
+ * hive-exec-2.x.jar (for hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303–0.9.2.jar and libthrift-0.9.2.jar)
-* `s_env` (StreamExecutionEnvironment),
-* `b_env` (ExecutionEnvironment)
-* `st_env` (StreamTableEnvironment for blink planner)
-* `bt_env` (BatchTableEnvironment for blink planner)
-* `st_env_2` (StreamTableEnvironment for flink planner)
-* `bt_env_2` (BatchTableEnvironment for flink planner)
-
-### IPython Support(%flink.ipyflink)
-
-By default, zeppelin would use IPython in `%flink.pyflink` when IPython is available, Otherwise it would fall back to the original python implementation.
-For the IPython features, you can refer doc[Python Interpreter](python.html)
-
-## ZeppelinContext
-Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities.
-See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. You can use `z` to display both flink DataSet and batch/stream table.
-
-* Display DataSet
-
## Paragraph local properties
In the section of `Streaming Data Visualization`, we demonstrate the different visualization type via paragraph local properties: `type`.
-In this section, we will list and explain all the supported local properties in flink interpreter.
+In this section, we will list and explain all the supported local properties in Flink interpreter.
@@ -498,5 +792,8 @@ In this section, we will list and explain all the supported local properties in
Zeppelin is shipped with several Flink tutorial notes which may be helpful for you. You can check for more features in the tutorial notes.
+## Community
+
+[Join our community](http://zeppelin.apache.org/community.html) to discuss with others.
diff --git a/docs/interpreter/geode.md b/docs/interpreter/geode.md
deleted file mode 100644
index 436c308c5c1..00000000000
--- a/docs/interpreter/geode.md
+++ /dev/null
@@ -1,218 +0,0 @@
----
-layout: page
-title: "Geode/Gemfire OQL Interpreter for Apache Zeppelin"
-description: "Apache Geode (incubating) provides a database-like consistency model, reliable transaction processing and a shared-nothing architecture to maintain very low latency performance with high concurrency processing."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Geode/Gemfire OQL Interpreter for Apache Zeppelin
-
-
-
-## Overview
-
-
-
Name
-
Class
-
Description
-
-
-
%geode.oql
-
GeodeOqlInterpreter
-
Provides OQL environment for Apache Geode
-
-
-
-This interpreter supports the [Geode](http://geode.incubator.apache.org/) [Object Query Language (OQL)](http://geode-docs.cfapps.io/docs/developing/querying_basics/oql_compared_to_sql.html).
-With the OQL-based querying language:
-
-[](https://www.youtube.com/watch?v=zvzzA9GXu3Q)
-
-* You can query on any arbitrary object
-* You can navigate object collections
-* You can invoke methods and access the behavior of objects
-* Data mapping is supported
-* You are not required to declare types. Since you do not need type definitions, you can work across multiple languages
-* You are not constrained by a schema
-
-This [Video Tutorial](https://www.youtube.com/watch?v=zvzzA9GXu3Q) illustrates some of the features provided by the `Geode Interpreter`.
-
-## Create Interpreter
-By default Zeppelin creates one `Geode/OQL` instance. You can remove it or create more instances.
-
-Multiple Geode instances can be created, each configured to the same or different backend Geode cluster.
-But over time a `Notebook` can have only one Geode interpreter instance `bound`.
-That means you _cannot_ connect to different Geode clusters in the same `Notebook`.
-This is a known Zeppelin limitation.
-
-To create new Geode instance open the `Interpreter` section and click the `+Create` button.
-Pick a `Name` of your choice and from the `Interpreter` drop-down select `geode`.
-Then follow the configuration instructions and `Save` the new instance.
-
-> Note: The `Name` of the instance is used only to distinguish the instances while binding them to the `Notebook`. The `Name` is irrelevant inside the `Notebook`. In the `Notebook` you must use `%geode.oql` tag.
-
-## Bind to Notebook
-In the `Notebook` click on the `settings` icon in the top right corner.
-The select/deselect the interpreters to be bound with the `Notebook`.
-
-## Configuration
-You can modify the configuration of the Geode from the `Interpreter` section.
-The Geode interpreter expresses the following properties:
-
-
-
-
Property Name
-
Description
-
Default Value
-
-
-
geode.locator.host
-
The Geode Locator Host
-
localhost
-
-
-
geode.locator.port
-
The Geode Locator Port
-
10334
-
-
-
geode.max.result
-
Max number of OQL result to display to prevent the browser overload
-
1000
-
-
-
-## How to use
-> *Tip 1: Use (CTRL + .) for OQL auto-completion.*
-
-> *Tip 2: Always start the paragraphs with the full `%geode.oql` prefix tag! The short notation: `%geode` would still be able run the OQL queries but the syntax highlighting and the auto-completions will be disabled.*
-
-### Create / Destroy Regions
-
-The OQL specification does not support [Geode Regions](https://cwiki.apache.org/confluence/display/GEODE/Index#Index-MainConceptsandComponents) mutation operations.
-To `create`/`destroy` regions one should use the [GFSH](http://geode-docs.cfapps.io/docs/tools_modules/gfsh/chapter_overview.html) shell tool instead.
-In the following it is assumed that the GFSH is colocated with Zeppelin server.
-
-```bash
-%sh
-source /etc/geode/conf/geode-env.sh
-gfsh << EOF
-
- connect --locator=ambari.localdomain[10334]
-
- destroy region --name=/regionEmployee
- destroy region --name=/regionCompany
- create region --name=regionEmployee --type=REPLICATE
- create region --name=regionCompany --type=REPLICATE
-
- exit;
-EOF
-```
-
-Above snippet re-creates two regions: `regionEmployee` and `regionCompany`.
-Note that you have to explicitly specify the locator host and port.
-The values should match those you have used in the Geode Interpreter configuration.
-Comprehensive list of [GFSH Commands by Functional Area](http://geode-docs.cfapps.io/docs/tools_modules/gfsh/gfsh_quick_reference.html).
-
-### Basic OQL
-```sql
-%geode.oql
-SELECT count(*) FROM /regionEmployee
-```
-
-OQL `IN` and `SET` filters:
-
-```sql
-%geode.oql
-SELECT * FROM /regionEmployee
-WHERE companyId IN SET(2) OR lastName IN SET('Tzolov13', 'Tzolov73')
-```
-
-OQL `JOIN` operations
-
-```sql
-%geode.oql
-SELECT e.employeeId, e.firstName, e.lastName, c.id as companyId, c.companyName, c.address
-FROM /regionEmployee e, /regionCompany c
-WHERE e.companyId = c.id
-```
-
-By default the QOL responses contain only the region entry values. To access the keys, query the `EntrySet` instead:
-
-```sql
-%geode.oql
-SELECT e.key, e.value.companyId, e.value.email
-FROM /regionEmployee.entrySet e
-```
-Following query will return the EntrySet value as a Blob:
-
-```sql
-%geode.oql
-SELECT e.key, e.value FROM /regionEmployee.entrySet e
-```
-
-> Note: You can have multiple queries in the same paragraph but only the result from the first is displayed. [[1](https://issues.apache.org/jira/browse/ZEPPELIN-178)], [[2](https://issues.apache.org/jira/browse/ZEPPELIN-212)].
-
-### GFSH Commands From The Shell
-Use the Shell Interpreter (`%sh`) to run OQL commands form the command line:
-
-```bash
-%sh
-source /etc/geode/conf/geode-env.sh
-gfsh -e "connect" -e "list members"
-```
-
-### Apply Zeppelin Dynamic Forms
-You can leverage [Zeppelin Dynamic Form](../usage/dynamic_form/intro.html) inside your OQL queries. You can use both the `text input` and `select form` parameterization features
-
-```sql
-%geode.oql
-SELECT * FROM /regionEmployee e WHERE e.employeeId > ${Id}
-```
-
-### Auto-completion
-The Geode Interpreter provides a basic auto-completion functionality. On `(Ctrl+.)` it list the most relevant suggestions in a pop-up window.
-
-## Geode REST API
-To list the defined regions you can use the [Geode REST API](http://geode-docs.cfapps.io/docs/geode_rest/chapter_overview.html):
-
-```
-http://phd1.localdomain:8484/gemfire-api/v1/
-```
-
-```json
-{
- "regions" : [{
- "name" : "regionEmployee",
- "type" : "REPLICATE",
- "key-constraint" : null,
- "value-constraint" : null
- }, {
- "name" : "regionCompany",
- "type" : "REPLICATE",
- "key-constraint" : null,
- "value-constraint" : null
- }]
-}
-```
-
-> To enable Geode REST API with JSON support add the following properties to geode.server.properties.file and restart:
-
-```
-http-service-port=8484
-start-dev-rest-api=true
-```
diff --git a/docs/interpreter/hazelcastjet.md b/docs/interpreter/hazelcastjet.md
deleted file mode 100644
index 06ebc888ac9..00000000000
--- a/docs/interpreter/hazelcastjet.md
+++ /dev/null
@@ -1,143 +0,0 @@
----
-layout: page
-title: Hazelcast Jet interpreter in Apache Zeppelin
-description: Build and execture Hazelcast Jet computation jobs.
-group: interpreter
----
-
-
-{% include JB/setup %}
-
-# Hazelcast Jet interpreter for Apache Zeppelin
-
-
-
-## Overview
-[Hazelcast Jet](https://jet.hazelcast.org) is an open source application embeddable, distributed computing engine for In-Memory Streaming and Fast Batch Processing built on top of Hazelcast In-Memory Data Grid (IMDG).
-With Hazelcast IMDG providing storage functionality, Hazelcast Jet performs parallel execution to enable data-intensive applications to operate in near real-time.
-
-## Why Hazelcast Jet?
-There are plenty of solutions which can solve some of these issues, so why choose Hazelcast Jet?
-When speed and simplicity is important.
-
-Hazelcast Jet gives you all the infrastructure you need to build a distributed data processing pipeline within one 10Mb Java JAR: processing, storage and clustering.
-
-As it is built on top of Hazelcast IMDG, Hazelcast Jet comes with in-memory operational storage that’s available out-of-the box. This storage is partitioned, distributed and replicated across the Hazelcast Jet cluster for capacity and resiliency. It can be used as an input data buffer, to publish the results of a Hazelcast Jet computation, to connect multiple Hazelcast Jet jobs or as a lookup cache for data enrichment.
-
-## How to use the Hazelcast Jet interpreter
-Basically, you can write normal java code. You should write the main method inside a class because the interpreter invoke this main to execute the code. Unlike Zeppelin normal pattern, each paragraph is considered as a separate job, there isn't any relation to any other paragraph. For example, a variable defined in one paragraph cannot be used in another one as each paragraph is a self contained java main class that is executed and the output returned to Zeppelin.
-
-The following is a demonstration of a word count example with the result represented as an Hazelcast IMDG IMap sink and displayed leveraging Zeppelin's built in visualization using the utility method `JavaInterpreterUtils.displayTableFromSimpleMap`.
-
-```java
-%hazelcastjet
-
-import com.hazelcast.jet.Jet;
-import com.hazelcast.jet.JetInstance;
-import com.hazelcast.jet.core.DAG;
-import com.hazelcast.jet.pipeline.Pipeline;
-import com.hazelcast.jet.pipeline.Sinks;
-import com.hazelcast.jet.pipeline.Sources;
-
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import org.apache.zeppelin.java.JavaInterpreterUtils;
-
-import static com.hazelcast.jet.Traversers.traverseArray;
-import static com.hazelcast.jet.aggregate.AggregateOperations.counting;
-import static com.hazelcast.jet.function.DistributedFunctions.wholeItem;
-
-public class DisplayTableFromSimpleMapExample {
-
- public static void main(String[] args) {
-
- // Create the specification of the computation pipeline. Note
- // it's a pure POJO: no instance of Jet needed to create it.
- Pipeline p = Pipeline.create();
- p.drawFrom(Sources.list("text"))
- .flatMap(word ->
- traverseArray(word.toLowerCase().split("\\W+")))
- .filter(word -> !word.isEmpty())
- .groupingKey(wholeItem())
- .aggregate(counting())
- .drainTo(Sinks.map("counts"));
-
- // Start Jet, populate the input list
- JetInstance jet = Jet.newJetInstance();
- try {
- List text = jet.getList("text");
- text.add("hello world hello hello world");
- text.add("world world hello world");
-
- // Perform the computation
- jet.newJob(p).join();
-
- // Diplay the results with Zeppelin %table
- Map counts = jet.getMap("counts");
- System.out.println(JavaInterpreterUtils.displayTableFromSimpleMap("Word","Count", counts));
-
- } finally {
- Jet.shutdownAll();
- }
-
- }
-
-}
-```
-
-The following is a demonstration where the Hazelcast DAG (directed acyclic graph) is displayed as a graph leveraging Zeppelin's built in visualization using the utility method `HazelcastJetInterpreterUtils.displayNetworkFromDAG`.
-This is particularly useful to understand how the high level Pipeline is then converted to the Jet’s low-level Core API.
-
-```java
-%hazelcastjet
-
-import com.hazelcast.jet.pipeline.Pipeline;
-import com.hazelcast.jet.pipeline.Sinks;
-import com.hazelcast.jet.pipeline.Sources;
-
-import org.apache.zeppelin.hazelcastjet.HazelcastJetInterpreterUtils;
-
-import static com.hazelcast.jet.Traversers.traverseArray;
-import static com.hazelcast.jet.aggregate.AggregateOperations.counting;
-import static com.hazelcast.jet.function.DistributedFunctions.wholeItem;
-
-public class DisplayNetworkFromDAGExample {
-
- public static void main(String[] args) {
-
- // Create the specification of the computation pipeline. Note
- // it's a pure POJO: no instance of Jet needed to create it.
- Pipeline p = Pipeline.create();
- p.drawFrom(Sources.list("text"))
- .flatMap(word ->
- traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing")
- .filter(word -> !word.isEmpty())
- .groupingKey(wholeItem())
- .aggregate(counting())
- .drainTo(Sinks.map("counts"));
-
- // Diplay the results with Zeppelin %network
- System.out.println(HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag()));
-
- }
-
-}
-```
-
-Note
-- By clicking on a node of the graph, the node type is displayed (either Source, Sink or Transform). This is also visually represented with colors (Sources and Sinks are blue, Transforms are orange).
-- By clicking on an edge of the graph, the following details are shown: routing (UNICAST, PARTITIONED, ISOLATED, BROADCAST), distributed (true or false), priority (int).
diff --git a/docs/interpreter/hbase.md b/docs/interpreter/hbase.md
index fd6334acebc..50228407c92 100644
--- a/docs/interpreter/hbase.md
+++ b/docs/interpreter/hbase.md
@@ -32,14 +32,14 @@ By default, Zeppelin is built against HBase 1.0.x releases. To work with HBase 1
```bash
# HBase 1.1.4
-mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.1.4 -Dhbase.hadoop.version=2.6.0
+./mvnw clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.1.4 -Dhbase.hadoop.version=2.6.0
```
To work with HBase 1.2.0+, use the following build command:
```bash
# HBase 1.2.0
-mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.2.0 -Dhbase.hadoop.version=2.6.0
+./mvnw clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.2.0 -Dhbase.hadoop.version=2.6.0
```
## Configuration
diff --git a/docs/interpreter/hive.md b/docs/interpreter/hive.md
index 86602fcc27f..94e49ce6a4d 100644
--- a/docs/interpreter/hive.md
+++ b/docs/interpreter/hive.md
@@ -25,7 +25,7 @@ limitations under the License.
## Important Notice
-Hive Interpreter will be deprecated and merged into JDBC Interpreter.
+Hive Interpreter has been deprecated and merged into JDBC Interpreter.
You can use Hive Interpreter by using JDBC Interpreter with same functionality.
See the example below of settings and dependencies.
@@ -36,19 +36,19 @@ See the example below of settings and dependencies.
Value
-
hive.driver
+
default.driver
org.apache.hive.jdbc.HiveDriver
-
hive.url
+
default.url
jdbc:hive2://localhost:10000
-
hive.user
+
default.user
hiveUser
-
hive.password
+
default.password
hivePassword
@@ -103,34 +103,22 @@ See the example below of settings and dependencies.
( Optional ) Other properties used by the driver
-
${prefix}.driver
-
-
Driver class path of %hive(${prefix})
-
-
-
${prefix}.url
-
-
Url of %hive(${prefix})
+
zeppelin.jdbc.hive.timeout.threshold
+
60000
+
Timeout for hive job timeout
-
${prefix}.user
-
-
( Optional ) Username of the connection of %hive(${prefix})
+
zeppelin.jdbc.hive.monitor.query_interval
+
1000
+
Query interval for hive statement
-
${prefix}.password
-
-
( Optional ) Password of the connection of %hive(${prefix})
-
-
-
${prefix}.xxx
-
-
( Optional ) Other properties used by the driver of %hive(${prefix})
+
zeppelin.jdbc.hive.engines.tag.enable
+
true
+
Set application tag for applications started by hive engines
-This interpreter provides multiple configuration with `${prefix}`. User can set a multiple connection properties by this prefix. It can be used like `%hive(${prefix})`.
-
## Overview
The [Apache Hive](https://hive.apache.org/) ™ data warehouse software facilitates querying and managing large datasets
@@ -147,14 +135,6 @@ Basically, you can use
select * from my_table;
```
-or
-
-```sql
-%hive(etl)
--- 'etl' is a ${prefix}
-select * from my_table;
-```
-
You can also run multiple queries up to 10 by default. Changing these settings is not implemented yet.
### Apply Zeppelin Dynamic Forms
diff --git a/docs/interpreter/ignite.md b/docs/interpreter/ignite.md
deleted file mode 100644
index 40c56b89f46..00000000000
--- a/docs/interpreter/ignite.md
+++ /dev/null
@@ -1,128 +0,0 @@
----
-layout: page
-title: "Ignite Interpreter for Apache Zeppelin"
-description: "Apache Ignite in-memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Ignite Interpreter for Apache Zeppelin
-
-
-
-## Overview
-[Apache Ignite](https://ignite.apache.org/) In-Memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies.
-
-![Apache Ignite]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-logo.png)
-
-You can use Zeppelin to retrieve distributed data from cache using Ignite SQL interpreter. Moreover, Ignite interpreter allows you to execute any Scala code in cases when SQL doesn't fit to your requirements. For example, you can populate data into your caches or execute distributed computations.
-
-## Installing and Running Ignite example
-In order to use Ignite interpreters, you may install Apache Ignite in some simple steps:
-
-1. Ignite provides examples only with source or binary release. Download Ignite [source release](https://ignite.apache.org/download.html#sources) or [binary release](https://ignite.apache.org/download.html#binaries) whatever you want. But you must download Ignite as the same version of Zeppelin's. If it is not, you can't use scala code on Zeppelin. The supported Ignite version is specified in [Supported Interpreter table](https://zeppelin.apache.org/supported_interpreters.html#ignite) for each Zeppelin release. If you're using Zeppelin master branch, please see `ignite.version` in `path/to/your-Zeppelin/ignite/pom.xml`.
-2. Examples are shipped as a separate Maven project, so to start running you simply need to import provided `/apache-ignite-fabric-{version}-bin/examples/pom.xml` file into your favourite IDE, such as Eclipse.
-
-* In case of Eclipse, Eclipse -> File -> Import -> Existing Maven Projects
-* Set examples directory path to Eclipse and select the pom.xml.
-* Then start `org.apache.ignite.examples.ExampleNodeStartup` (or whatever you want) to run at least one or more ignite node. When you run example code, you may notice that the number of node is increase one by one.
-
-> **Tip. If you want to run Ignite examples on the cli not IDE, you can export executable Jar file from IDE. Then run it by using below command.**
-
-```bash
-nohup java -jar
-```
-
-## Configuring Ignite Interpreter
-At the "Interpreters" menu, you may edit Ignite interpreter or create new one. Zeppelin provides these properties for Ignite.
-
-
You can connect to the Ignite cluster as client or server node. See Ignite Clients vs. Servers section for details. Use true or false values in order to connect in client or server mode respectively.
-
-
-
ignite.config.url
-
-
Configuration URL. Overrides all other settings.
-
-
-
ignite.jdbc.url
-
jdbc:ignite:cfg://default-ignite-jdbc.xml
-
Ignite JDBC connection URL.
-
-
-
ignite.peerClassLoadingEnabled
-
true
-
Enables peer-class-loading. See Zero Deployment section for details. Use true or false values in order to enable or disable P2P class loading respectively.
-
-
-
-![Configuration of Ignite Interpreter]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png)
-
-## How to use
-After configuring Ignite interpreter, create your own notebook. Then you can bind interpreters like below image.
-
-![Binding Interpreters]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png)
-
-For more interpreter binding information see [here](../usage/interpreter/overview.html#what-is-interpreter-setting).
-
-### Ignite SQL interpreter
-In order to execute SQL query, use ` %ignite.ignitesql ` prefix.
-Supposing you are running `org.apache.ignite.examples.streaming.wordcount.StreamWords`, then you can use "words" cache( Of course you have to specify this cache name to the Ignite interpreter setting section `ignite.jdbc.url` of Zeppelin ).
-For example, you can select top 10 words in the words cache using the following query
-
-```sql
-%ignite.ignitesql
-select _val, count(_val) as cnt from String group by _val order by cnt desc limit 10
-```
-
-![IgniteSql on Zeppelin]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png)
-
-As long as your Ignite version and Zeppelin Ignite version is same, you can also use scala code. Please check the Zeppelin Ignite version before you download your own Ignite.
-
-```scala
-%ignite
-import org.apache.ignite._
-import org.apache.ignite.cache.affinity._
-import org.apache.ignite.cache.query._
-import org.apache.ignite.configuration._
-
-import scala.collection.JavaConversions._
-
-val cache: IgniteCache[AffinityUuid, String] = ignite.cache("words")
-
-val qry = new SqlFieldsQuery("select avg(cnt), min(cnt), max(cnt) from (select count(_val) as cnt from String group by _val)", true)
-
-val res = cache.query(qry).getAll()
-
-collectionAsScalaIterable(res).foreach(println _)
-```
-
-![Using Scala Code]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png)
-
-Apache Ignite also provides a guide docs for Zeppelin ["Ignite with Apache Zeppelin"](https://apacheignite-sql.readme.io/docs/apache-zeppelin)
diff --git a/docs/interpreter/jdbc.md b/docs/interpreter/jdbc.md
index d556160a0da..152117fe75e 100644
--- a/docs/interpreter/jdbc.md
+++ b/docs/interpreter/jdbc.md
@@ -1,7 +1,7 @@
---
layout: page
title: "Generic JDBC Interpreter for Apache Zeppelin"
-description: "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter."
+description: "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Presto/Trino, Impala, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter."
group: interpreter
---
-
-{% include JB/setup %}
-
-# Kotlin interpreter for Apache Zeppelin
-
-
-
-## Overview
-Kotlin is a cross-platform, statically typed, general-purpose programming language with type inference.
-It is designed to interoperate fully with Java, and the JVM version of its standard library depends on the Java Class Library, but type inference allows its syntax to be more concise.
-
-## Configuration
-
-
-
-
Name
-
Default
-
Description
-
-
-
-
-
zeppelin.kotlin.maxResult
-
1000
-
Max n
-
-
-
zeppelin.kotlin.shortenTypes
-
true
-
Display shortened types instead of full, e.g. Int vs kotlin.Int
-
-
-
-
-## Example
-
-```kotlin
-%kotlin
-
-fun square(n: Int): Int = n * n
-```
-
-## Kotlin Context
-Kotlin context is accessible via `kc` object bound to the interpreter.
-It holds `vars` and `functions` fields that return all user-defined variables and functions present in the interpreter.
-You can also print variables or functions by calling `kc.showVars()` or `kc.showFunctions()`.
-
-### Example
-
-
-```kotlin
-fun square(n: Int): Int = n * n
-
-val greeter = { s: String -> println("Hello $s!") }
-val l = listOf("Drive", "to", "develop")
-
-kc.showVars()
-kc.showFunctions()
-```
-Output:
-```
-l: List = [Drive, to, develop]
-greeter: (String) -> Unit = (kotlin.String) -> kotlin.Unit
-fun square(Int): Int
-```
diff --git a/docs/interpreter/ksql.md b/docs/interpreter/ksql.md
deleted file mode 100644
index bc91ade6418..00000000000
--- a/docs/interpreter/ksql.md
+++ /dev/null
@@ -1,78 +0,0 @@
----
-layout: page
-title: "KSQL Interpreter for Apache Zeppelin"
-description: "SQL is the streaming SQL engine for Apache Kafka and provides an easy-to-use yet powerful interactive SQL interface for stream processing on Kafka."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# KSQL Interpreter for Apache Zeppelin
-
-
-
-## Overview
-[KSQL](https://www.confluent.io/product/ksql/) is the streaming SQL engine for Apache Kafka®. It provides an easy-to-use yet powerful interactive SQL interface for stream processing on Kafka,
-
-## Configuration
-
-
-
-
Property
-
Default
-
Description
-
-
-
-
-
ksql.url
-
http://localhost:8080
-
The KSQL Endpoint base URL
-
-
-
-
-N.b. The interpreter supports all the KSQL properties, i.e. `ksql.streams.auto.offset.reset`.
-The full list of KSQL parameters is [here](https://docs.confluent.io/current/ksql/docs/installation/server-config/config-reference.html).
-
-## Using the KSQL Interpreter
-In a paragraph, use `%ksql` and start your SQL query in order to start to interact with KSQL.
-
-Following some examples:
-
-```
-%ksql
-PRINT 'orders';
-```
-
-![PRINT image]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ksql.1.gif)
-
-```
-%ksql
-CREATE STREAM ORDERS WITH
- (VALUE_FORMAT='AVRO',
- KAFKA_TOPIC ='orders');
-```
-
-![CREATE image]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ksql.1.gif)
-
-```
-%ksql
-SELECT *
-FROM ORDERS
-LIMIT 10
-```
-
-![LIMIT image]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ksql.3.gif)
\ No newline at end of file
diff --git a/docs/interpreter/kylin.md b/docs/interpreter/kylin.md
deleted file mode 100644
index 1f2b0f3ab44..00000000000
--- a/docs/interpreter/kylin.md
+++ /dev/null
@@ -1,82 +0,0 @@
----
-layout: page
-title: "Apache Kylin Interpreter for Apache Zeppelin"
-description: "Apache Kylin™ is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc.
-."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Apache Kylin Interpreter for Apache Zeppelin
-
-
-
-## Overview
-[Apache Kylin](https://kylin.apache.org/) is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc. The interpreter assumes that Apache Kylin has been installed and you can connect to Apache Kylin from the machine Apache Zeppelin is installed.
-To get start with Apache Kylin, please see [Apache Kylin Quickstart](https://kylin.apache.org/docs15/index.html).
-
-## Configuration
-
-
-
Name
-
Default
-
Description
-
-
-
kylin.api.url
-
http://localhost:7070/kylin/api/query
-
kylin query POST API The format can be like http://<host>:<port>/kylin/api/query
-
-
-
kylin.api.user
-
ADMIN
-
kylin user
-
-
-
kylin.api.password
-
KYLIN
-
kylin password
-
-
-
kylin.query.project
-
learn_kylin
-
String, Project to perform query. Could update at notebook level
-
-
-
kylin.query.ispartial
-
true
-
true|false (@Deprecated since Apache Kylin V1.5) Whether accept a partial result or not, default be “false”. Set to “false” for production use.
-
-
-
kylin.query.limit
-
5000
-
int, Query limit If limit is set in sql, perPage will be ignored.
-
-
-
kylin.query.offset
-
0
-
int, Query offset If offset is set in sql, curIndex will be ignored.
-
-
-
-## Using the Apache Kylin Interpreter
-In a paragraph, use `%kylin(project_name)` to select the **kylin** interpreter, **project name** and then input **sql**. If no project name defined, will use the default project name from the above configuration.
-
-```sql
-%kylin(learn_project)
-select count(*) from kylin_sales group by part_dt
-```
-
diff --git a/docs/interpreter/lens.md b/docs/interpreter/lens.md
deleted file mode 100644
index cd00d1ca769..00000000000
--- a/docs/interpreter/lens.md
+++ /dev/null
@@ -1,188 +0,0 @@
----
-layout: page
-title: "Lens Interpreter for Apache Zeppelin"
-description: "Apache Lens provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with traditional data warehouses to appear like one."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Lens Interpreter for Apache Zeppelin
-
-
-
-## Overview
-[Apache Lens](https://lens.apache.org/) provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with traditional data warehouses to appear like one.
-
-![Apache Lens]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-logo.png)
-
-## Installing and Running Lens
-In order to use Lens interpreters, you may install Apache Lens in some simple steps:
-
-1. Download Lens for latest version from [the ASF](http://www.apache.org/dyn/closer.lua/lens/2.3-beta). Or the older release can be found [in the Archives](http://archive.apache.org/dist/lens/).
-2. Before running Lens, you have to set HIVE_HOME and HADOOP_HOME. If you want to get more information about this, please refer to [here](http://lens.apache.org/lenshome/install-and-run.html#Installation). Lens also provides Pseudo Distributed mode. [Lens pseudo-distributed setup](http://lens.apache.org/lenshome/pseudo-distributed-setup.html) is done by using [docker](https://www.docker.com/). Hive server and hadoop daemons are run as separate processes in lens pseudo-distributed setup.
-3. Now, you can start lens server (or stop).
-
-```bash
-./bin/lens-ctl start # (or stop)
-```
-
-## Configuring Lens Interpreter
-At the "Interpreters" menu, you can edit Lens interpreter or create new one. Zeppelin provides these properties for Lens.
-
-
-
-
Property Name
-
value
-
Description
-
-
-
lens.client.dbname
-
default
-
The database schema name
-
-
-
lens.query.enable.persistent.resultset
-
false
-
Whether to enable persistent resultset for queries. When enabled, server will fetch results from driver, custom format them if any and store in a configured location. The file name of query output is queryhandle-id, with configured extensions
-
-
-
lens.server.base.url
-
http://hostname:port/lensapi
-
The base url for the lens server. you have to edit "hostname" and "port" that you may use(ex. http://0.0.0.0:9999/lensapi)
-
-
-
lens.session.cluster.user
-
default
-
Hadoop cluster username
-
-
-
zeppelin.lens.maxResult
-
1000
-
Max number of rows to display
-
-
-
zeppelin.lens.maxThreads
-
10
-
If concurrency is true then how many threads?
-
-
-
zeppelin.lens.run.concurrent
-
true
-
Run concurrent Lens Sessions
-
-
-
xxx
-
yyy
-
anything else from [Configuring lens server](https://lens.apache.org/admin/config-server.html)
-
-
-
-![Apache Lens Interpreter Setting]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-interpreter-setting.png)
-
-### Interpreter Binding for Zeppelin Notebook
-After configuring Lens interpreter, create your own notebook, then you can bind interpreters like below image.
-
-![Zeppelin Notebook Interpreter Binding]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-interpreter-binding.png)
-
-For more interpreter binding information see [here](../usage/interpreter/overview.html#interpreter-binding-mode).
-
-### How to use
-You can analyze your data by using [OLAP Cube](http://lens.apache.org/user/olap-cube.html) [QL](http://lens.apache.org/user/cli.html) which is a high level SQL like language to query and describe data sets organized in data cubes.
-You may experience OLAP Cube like this [Video tutorial](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples).
-As you can see in this video, they are using Lens Client Shell(`./bin/lens-cli.sh`). All of these functions also can be used on Zeppelin by using Lens interpreter.
-
-
Create and Use (Switch) Databases.
-
-```sql
-create database newDb
-```
-
-```
-use newDb
-```
-
-
Add partitions to Dimtable and Fact.
-
-```
-dimtable add single-partition --dimtable_name customer_table --storage_name local
---path your/path/to/lens/client/examples/resources/customer-local-part.xml
-```
-
-```
-fact add partitions --fact_name sales_raw_fact --storage_name local
---path your/path/to/lens/client/examples/resources/sales-raw-local-parts.xml
-```
-
-
Now, you can run queries on cubes.
-
-```
-query execute cube select customer_city_name, product_details.description,
-product_details.category, product_details.color, store_sales from sales
-where time_range_in(delivery_time, '2015-04-11-00', '2015-04-13-00')
-```
-
-![Lens Query Result]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-result.png)
-
-These are just examples that provided in advance by Lens. If you want to explore whole tutorials of Lens, see the [tutorial video](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples).
-
-## Lens UI Service
-Lens also provides web UI service. Once the server starts up, you can open the service on http://serverhost:19999/index.html and browse. You may also check the structure that you made and use query easily here.
-
-![Lens UI Service]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-ui-service.png)
diff --git a/docs/interpreter/markdown.md b/docs/interpreter/markdown.md
index e06c563e47a..a9c830652db 100644
--- a/docs/interpreter/markdown.md
+++ b/docs/interpreter/markdown.md
@@ -25,11 +25,11 @@ limitations under the License.
## Overview
[Markdown](http://daringfireball.net/projects/markdown/) is a plain text formatting syntax designed so that it can be converted to HTML.
-Apache Zeppelin uses [flexmark](https://github.com/vsch/flexmark-java), [pegdown](https://github.com/sirthias/pegdown) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers.
+Apache Zeppelin uses [flexmark](https://github.com/vsch/flexmark-java) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers.
In Zeppelin notebook, you can use ` %md ` in the beginning of a paragraph to invoke the Markdown interpreter and generate static html from Markdown plain text.
-In Zeppelin, Markdown interpreter is enabled by default and uses the [pegdown](https://github.com/sirthias/pegdown) parser.
+In Zeppelin, Markdown interpreter is enabled by default and uses the [flexmark](https://github.com/vsch/flexmark-java) parser.
@@ -54,7 +54,7 @@ For more information, please see [Mathematical Expression](../usage/display_syst
markdown.parser.type
flexmark
-
Markdown Parser Type. Available values: flexmark, pegdown, markdown4j.
+
Markdown Parser Type. Available values: flexmark, markdown4j.
@@ -68,13 +68,8 @@ CommonMark/Markdown Java parser with source level AST.
-### Pegdown Parser
-
-`pegdown` parser provides github flavored markdown. Although still one of the most popular Markdown parsing libraries for the JVM, pegdown has reached its end of life.
-The project is essentially unmaintained with tickets piling up and crucial bugs not being fixed.`pegdown`'s parsing performance isn't great. But keep this parser for the backward compatibility.
-
### Markdown4j Parser
-Since `pegdown` parser is more accurate and provides much more markdown syntax `markdown4j` option might be removed later. But keep this parser for the backward compatibility.
+Since `flexmark` parser is more accurate and provides much more markdown syntax `markdown4j` option might be removed later. But keep this parser for the backward compatibility.
diff --git a/docs/interpreter/pig.md b/docs/interpreter/pig.md
deleted file mode 100644
index e640b34852e..00000000000
--- a/docs/interpreter/pig.md
+++ /dev/null
@@ -1,190 +0,0 @@
----
-layout: page
-title: "Pig Interpreter for Apache Zeppelin"
-description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs."
-group: manual
----
-
-{% include JB/setup %}
-
-
-# Pig Interpreter for Apache Zeppelin
-
-
-
-## Overview
-[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of
-a high-level language for expressing data analysis programs,
-coupled with infrastructure for evaluating these programs.
-The salient property of Pig programs is that their structure is amenable to substantial parallelization,
-which in turns enables them to handle very large data sets.
-
-## Supported interpreter type
- - `%pig.script` (default Pig interpreter, so you can use `%pig`)
-
- `%pig.script` is like the Pig grunt shell. Anything you can run in Pig grunt shell can be run in `%pig.script` interpreter, it is used for running Pig script where you don’t need to visualize the data, it is suitable for data munging.
-
- - `%pig.query`
-
- `%pig.query` is a little different compared with `%pig.script`. It is used for exploratory data analysis via Pig latin where you can leverage Zeppelin’s visualization ability. There're 2 minor differences in the last statement between `%pig.script` and `%pig.query`
- - No pig alias in the last statement in `%pig.query` (read the examples below).
- - The last statement must be in single line in `%pig.query`
-
-
-## How to use
-
-### How to setup Pig execution modes.
-
-- Local Mode
-
- Set `zeppelin.pig.execType` as `local`.
-
-- MapReduce Mode
-
- Set `zeppelin.pig.execType` as `mapreduce`. HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
-
-- Tez Local Mode
-
- Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez_local`.
-
-- Tez Mode
-
- Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez`. HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
-
-- Spark Local Mode
-
- Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark_local`.
-
-- Spark Mode
-
- Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark`. For now, only yarn-client mode is supported. To enable it, you need to set property `SPARK_MASTER` to yarn-client and set `SPARK_JAR` to the spark assembly jar.
-
-### How to choose custom Spark Version
-
-By default, Pig Interpreter would use Spark 1.6.3 built with scala 2.10, if you want to use another spark version or scala version,
-you need to rebuild Zeppelin by specifying the custom Spark version via -Dpig.spark.version= and scala version via -Dpig.scala.version= in the maven build command.
-
-### How to configure interpreter
-
-At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.
-And you can set any Pig properties here which will be passed to Pig engine. (like tez.queue.name & mapred.job.queue.name).
-Besides, we use paragraph title as job name if it exists, else use the last line of Pig script.
-So you can use that to find app running in YARN RM UI.
-
-
-
-
Property
-
Default
-
Description
-
-
-
zeppelin.pig.execType
-
mapreduce
-
Execution mode for pig runtime. local | mapreduce | tez_local | tez | spark_local | spark
-
-
-
zeppelin.pig.includeJobStats
-
false
-
whether display jobStats info in %pig.script
-
-
-
zeppelin.pig.maxResult
-
1000
-
max row number displayed in %pig.query
-
-
-
tez.queue.name
-
default
-
queue name for tez engine
-
-
-
mapred.job.queue.name
-
default
-
queue name for mapreduce engine
-
-
-
SPARK_MASTER
-
local
-
local | yarn-client
-
-
-
SPARK_JAR
-
-
The spark assembly jar, both jar in local or hdfs is supported. Put it on hdfs could have
- performance benefit
-
-
-
-### Example
-
-##### pig
-
-```
-%pig
-
-bankText = load 'bank.csv' using PigStorage(';');
-bank = foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance;
-bank = filter bank by age != '"age"';
-bank = foreach bank generate (int)age, REPLACE(job,'"','') as job, REPLACE(marital, '"', '') as marital, (int)(REPLACE(balance, '"', '')) as balance;
-store bank into 'clean_bank.csv' using PigStorage(';'); -- this statement is optional, it just show you that most of time %pig.script is used for data munging before querying the data.
-```
-
-##### pig.query
-
-Get the number of each age where age is less than 30
-
-```
-%pig.query
-
-bank_data = filter bank by age < 30;
-b = group bank_data by age;
-foreach b generate group, COUNT($1);
-```
-
-The same as above, but use dynamic text form so that use can specify the variable maxAge in textbox.
-(See screenshot below). Dynamic form is a very cool feature of Zeppelin, you can refer this [link]((../usage/dynamic_form/intro.html)) for details.
-
-```
-%pig.query
-
-bank_data = filter bank by age < ${maxAge=40};
-b = group bank_data by age;
-foreach b generate group, COUNT($1) as count;
-```
-
-Get the number of each age for specific marital type,
-also use dynamic form here. User can choose the marital type in the dropdown list (see screenshot below).
-
-```
-%pig.query
-
-bank_data = filter bank by marital=='${marital=single,single|divorced|married}';
-b = group bank_data by age;
-foreach b generate group, COUNT($1) as count;
-```
-
-The above examples are in the Pig tutorial note in Zeppelin, you can check that for details. Here's the screenshot.
-
-
-
-
-Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`,
-and do different kinds of query based on the data of `%pig`.
-Besides, we recommend you to specify alias explicitly so that the visualization can display
-the column name correctly. In the above example 2 and 3 of `%pig.query`, we name `COUNT($1)` as `count`.
-If you don't do this, then we will name it using position.
-E.g. in the above first example of `%pig.query`, we will use `col_1` in chart to represent `COUNT($1)`.
-
-
diff --git a/docs/interpreter/python.md b/docs/interpreter/python.md
index 86fb1dbb627..07e37a7b67f 100644
--- a/docs/interpreter/python.md
+++ b/docs/interpreter/python.md
@@ -36,20 +36,61 @@ Zeppelin supports python language which is very popular in data analytics and ma
%python
PythonInterpreter
-
Vanilla python interpreter, with least dependencies, only python environment installed is required
+
Vanilla python interpreter, with least dependencies, only python environment installed is required, %python will use IPython if its prerequisites are met
%python.ipython
IPythonInterpreter
-
Provide more fancy python runtime via IPython, almost the same experience like Jupyter. It requires more things, but is the recommended interpreter for using python in Zeppelin, see below
+
Provide more fancy python runtime via IPython, almost the same experience like Jupyter. It requires more things, but is the recommended interpreter for using python in Zeppelin, see below for more details
%python.sql
PythonInterpreterPandasSql
-
Provide sql capability to query data in Pandas DataFrame via pandasql
+
Provide sql capability to query data in Pandas DataFrame via pandasql, it can access dataframes in %python
+## Main Features
+
+
+
+
Feature
+
Description
+
+
+
Support vanilla Python and IPython
+
Vanilla Python only requires python install, IPython provides almost the same user experience like Jupyter, like inline plotting, code completion, magic methods and etc.
+
+
+
Built-in ZeppelinContext Support
+
You can use ZeppelinContext to visualize pandas dataframe
+
+
+
Support SQL on Pandas dataframe
+
You can use Sql to query dataframe which is defined in Python
+
+
+
Run Python in yarn cluster with customized Python runtime
+
You can run Python in yarn cluster with customized Python runtime without affecting each other
+
+
+
+## Play Python in Zeppelin docker
+
+For beginner, we would suggest you to play Python in Zeppelin docker first.
+In the Zeppelin docker image, we have already installed
+miniconda and lots of [useful python libraries](https://github.com/apache/zeppelin/blob/branch-0.10/scripts/docker/zeppelin/bin/env_python_3_with_R.yml)
+including IPython's prerequisites, so `%python` would use IPython.
+
+Without any extra configuration, you can run most of tutorial notes under folder `Python Tutorial` directly.
+
+
+```bash
+docker run -u $(id -u) -p 8080:8080 --rm --name zeppelin apache/zeppelin:0.10.0
+```
+
+After running the above command, you can open `http://localhost:8080` to play Python in Zeppelin.
+
## Configuration
@@ -80,12 +121,13 @@ Zeppelin supports python language which is very popular in data analytics and ma
zeppelin.yarn.dist.archives
-
Comma separated list of archives to be extracted into the working directory of interpreter. e.g. You can specify conda pack archive files via this property in python's yarn mode. It could be either files in local filesystem or files on hadoop compatible file systems
+
Used for ipython in yarn mode. It is a general zeppelin interpreter configuration, not python specific. For Python interpreter it is used
+ to specify the conda env archive file which could be on local filesystem or on hadoop compatible file system.
zeppelin.interpreter.conda.env.name
-
conda environment name, aka the folder name in the working directory of interpreter
+
Used for ipython in yarn mode. conda environment name, aka the folder name in the working directory of interpreter yarn container.
@@ -143,29 +185,32 @@ z.show(plt, height='150px', fmt='svg')
-
## IPython Interpreter (`%python.ipython`) (recommended)
-IPython is more powerful than the vanilla python interpreter with extra functionality. You can use IPython with Python2 or Python3 which depends on which python you set in `zeppelin.python`.
+IPython is more powerful than the vanilla python interpreter with extra functionality. This is what we recommend you to use instead of vanilla python interpreter. You can use IPython with Python2 or Python3 which depends on which python you set in `zeppelin.python`.
-For non-anaconda environment
+### Prerequisites
- **Prerequisites**
-
- - Jupyter `pip install jupyter`
- - grpcio `pip install grpcio`
- - protobuf `pip install protobuf`
+* For non-anaconda environment, You need to install the following packages
-For anaconda environment (`zeppelin.python` points to the python under anaconda)
+```
+pip install jupyter
+pip install grpcio
+pip install protobuf
+```
+
+* For anaconda environment (`zeppelin.python` points to the python under anaconda)
+
+```
+pip install grpcio
+pip install protobuf
+```
- **Prerequisites**
-
- - grpcio `pip install grpcio`
- - protobuf `pip install protobuf`
+Zeppelin will check the above prerequisites when using `%python`, if IPython prerequisites are met, `%python` would use IPython interpreter,
+otherwise it would use vanilla Python interpreter in `%python`.
In addition to all the basic functions of the vanilla python interpreter, you can use all the IPython advanced features as you use it in Jupyter Notebook.
-
-e.g.
+Take a look at tutorial note `Python Tutorial/1. IPython Basic` and `Python Tutorial/2. IPython Visualization Tutorial` for how to use IPython in Zeppelin.
### Use IPython magic
@@ -193,67 +238,76 @@ plt.figure()
plt.plot(data)
```
+### Run shell command
+
+```
+%python.ipython
+
+!pip install pandas
+```
+
### Colored text output
-
+
### More types of visualization
-e.g. IPython supports hvplot
+
+e.g. You can use hvplot in the same way as in Jupyter, Take a look at tutorial note `Python Tutorial/2. IPython Visualization Tutorial` for more visualization examples.
+
+
### Better code completion
-
+Type `tab` can give you all the completion candidates just like in Jupyter.
+
+
-By default, Zeppelin would use IPython in `%python` if IPython prerequisites are meet, otherwise it would use vanilla Python interpreter in `%python`.
-If you don't want to use IPython via `%python`, then you can set `zeppelin.python.useIPython` as `false` in interpreter setting.
+## Pandas Integration
-## Pandas integration
Apache Zeppelin [Table Display System](../usage/display_system/basic.html#table) provides built-in data visualization capabilities.
-Python interpreter leverages it to visualize Pandas DataFrames though similar `z.show()` API, same as with [Matplotlib integration](#matplotlib-integration).
+Python interpreter leverages it to visualize Pandas DataFrames via `z.show()` API.
-Example:
+For example:
-```python
-%python
+
-import pandas as pd
-rates = pd.read_csv("bank.csv", sep=";")
-z.show(rates)
-```
+By default, `z.show` only display 1000 rows, you can configure `zeppelin.python.maxResult` to adjust the max number of rows.
## SQL over Pandas DataFrames
There is a convenience `%python.sql` interpreter that matches Apache Spark experience in Zeppelin and
enables usage of SQL language to query [Pandas DataFrames](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) and
-visualization of results though built-in [Table Display System](../usage/display_system/basic.html#table).
+visualization of results through built-in [Table Display System](../usage/display_system/basic.html#table).
+`%python.sql` can access dataframes defined in `%python`.
- **Prerequisites**
+**Prerequisites**
- Pandas `pip install pandas`
- PandaSQL `pip install -U pandasql`
Here's one example:
- - first paragraph
+* first paragraph
```python
%python
-
import pandas as pd
rates = pd.read_csv("bank.csv", sep=";")
```
- - next paragraph
+* next paragraph
```sql
%python.sql
-
SELECT * FROM rates WHERE age < 40
```
+
+
+
## Using Zeppelin Dynamic Forms
You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/usage/dynamic_form/intro.html) inside your Python code.
@@ -357,24 +411,31 @@ Python interpreter create a variable `z` which represent `ZeppelinContext` for y
-## Run Python in yarn cluster
+## Run Python interpreter in yarn cluster
-Zeppelin supports to run python interpreter in yarn cluster which means the python interpreter runs in the yarn container.
+Zeppelin supports to [run interpreter in yarn cluster](../quickstart/yarn.html) which means the python interpreter can run in a yarn container.
This can achieve better multi-tenant for python interpreter especially when you already have a hadoop yarn cluster.
-But there's one critical problem to run python in yarn cluster: how to manage the python environment in yarn container. Because yarn cluster is a distributed cluster environemt
-which is composed many nodes, and your python interpreter can start in any node. It is not practical to manage python environment in each nodes.
+But there's one critical problem to run python in yarn cluster: how to manage the python environment in yarn container. Because hadoop yarn cluster is a distributed cluster environment
+which is composed of many nodes, and your python interpreter can start in any node. It is not practical to manage python environment in each node beforehand.
So in order to run python in yarn cluster, we would suggest you to use conda to manage your python environment, and Zeppelin can ship your
-codna environment to yarn container, so that each python interpreter can has its own python environment.
+conda environment to yarn container, so that each python interpreter can have its own python environment without affecting each other.
+
+Python interpreter in yarn cluster only works for IPython, so make sure IPython's prerequisites are met. So make sure including the following packages in Step 1.
+
+* python
+* jupyter
+* grpcio
+* protobuf
### Step 1
-We would suggest you to use conda pack to create archives of conda environments, and ship it to yarn container. Otherwise python interpreter
-will use the python executable in PATH of yarn container.
+We would suggest you to use [conda-pack](https://conda.github.io/conda-pack/) to create archive of conda environment, and ship it to yarn container. Otherwise python interpreter
+will use the python executable file in PATH of yarn container.
-Here's one example of yml file which could be used to generate a conda environment with python 3 and some useful python libraries.
+Here's one example of yaml file which could be used to create a conda environment with python 3 and some useful python libraries.
-* Create yml file for conda environment, write the following content into file `env_python_3.yml`
+* Create yaml file for conda environment, write the following content into file `python_3_env.yml`
```text
name: python_3_env
@@ -382,17 +443,15 @@ channels:
- conda-forge
- defaults
dependencies:
- - python=3.7
+ - python=3.9
+ - jupyter
+ - grpcio
+ - protobuf
- pycodestyle
- numpy
- pandas
- scipy
- - grpcio
- - protobuf
- - pandasql
- - ipython
- - ipykernel
- - jupyter_client
+ - pandasql
- panel
- pyyaml
- seaborn
@@ -407,11 +466,11 @@ dependencies:
```
-* Create conda environment via this yml file using either `conda` or `mamba`
+* Create conda environment via this yml file using either [conda](https://docs.conda.io/en/latest/) or [mamba](https://github.com/mamba-org/mamba)
```bash
-conda env create -f env_python_3.yml
+conda env create -f python_3_env.yml
```
```bash
@@ -420,28 +479,34 @@ mamba env create -f python_3_env
```
-* Pack the conda environment using either `conda`
+* Pack the conda environment using `conda`
```bash
-conda pack -n python_3
+conda pack -n python_3_env
```
### Step 2
-Specify the following properties to enable yarn mode for python interpreter, and specify the correct python environment.
+Specify the following properties to enable yarn mode for python interpreter.
```
+%python.conf
+
zeppelin.interpreter.launcher yarn
-zeppelin.yarn.dist.archives /home/hadoop/python_3.tar.gz#environment
+zeppelin.yarn.dist.archives /home/hadoop/python_3_env.tar.gz#environment
zeppelin.interpreter.conda.env.name environment
```
+Setting `zeppelin.interpreter.launcher` as `yarn` will launch python interpreter in yarn cluster.
+
`zeppelin.yarn.dist.archives` is the python conda environment tar which is created in step 1.
This tar will be shipped to yarn container and untar in the working directory of yarn container.
-`environment` in `/home/hadoop/python_3.tar.gz#environment` is the folder name after untar. This folder name should be the same as `zeppelin.interpreter.conda.env.name`.
+`environment` in `/home/hadoop/python_3.tar.gz#environment` is the folder name after untar.
-## Python environments (used for non-yarn mode)
+This folder name should be the same as `zeppelin.interpreter.conda.env.name`. Usually we name it as `environment` here.
+
+## Python environments (used for vanilla python interpreter in non-yarn mode)
### Default
By default, PythonInterpreter will use python command defined in `zeppelin.python` property to run python process.
@@ -529,14 +594,6 @@ Here is an example
%python.docker activate gcr.io/tensorflow/tensorflow:latest
```
-## Technical description
-
-For in-depth technical details on current implementation please refer to [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md).
-
-
-## Some features not yet implemented in the vanilla Python interpreter
+## Community
-* Interrupt a paragraph execution (`cancel()` method) is currently only supported in Linux and MacOs.
-If interpreter runs in another operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter.
-A JIRA ticket ([ZEPPELIN-893](https://issues.apache.org/jira/browse/ZEPPELIN-893)) is opened to implement this feature in a next release of the interpreter.
-* Progression bar in webUI (`getProgress()` method) is currently not implemented.
+[Join our community](http://zeppelin.apache.org/community.html) to discuss with others.
diff --git a/docs/interpreter/r.md b/docs/interpreter/r.md
index 2d39126fd4a..221f34e14e1 100644
--- a/docs/interpreter/r.md
+++ b/docs/interpreter/r.md
@@ -151,6 +151,26 @@ If you want to use R with Spark, it is almost the same via `%spark.r`, `%spark.i
+## Play R in Zeppelin docker
+
+For beginner, we would suggest you to play R in Zeppelin docker first. In the Zeppelin docker image, we have already installed R and lots of useful R libraries including IRKernel's prerequisites, so `%r.ir` is available.
+
+Without any extra configuration, you can run most of tutorial notes under folder `R Tutorial` directly.
+
+```
+docker run -u $(id -u) -p 8080:8080 -p:6789:6789 --rm --name zeppelin apache/zeppelin:0.10.0
+```
+
+After running the above command, you can open `http://localhost:8080` to play R in Zeppelin.
+The port `6789` exposed in the above command is for R shiny app. You need to make the following 2 interpreter properties to enable shiny app accessible as iframe in Zeppelin docker container.
+
+* `zeppelin.R.shiny.portRange` to be `6789:6789`
+* Set `ZEPPELIN_LOCAL_IP` to be `0.0.0.0`
+
+
+
+
+
## Interpreter binding mode
The default [interpreter binding mode](../usage/interpreter/interpreter_binding_mode.html) is `globally shared`. That means all notes share the same R interpreter.
@@ -341,7 +361,7 @@ channels:
- conda-forge
- defaults
dependencies:
- - python=3.7
+ - python=3.9
- jupyter
- grpcio
- protobuf
diff --git a/docs/interpreter/sap.md b/docs/interpreter/sap.md
deleted file mode 100644
index 0cb3a3c905b..00000000000
--- a/docs/interpreter/sap.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-
-layout: page
-
-title: "SAP BusinessObjects Interpreter for Apache Zeppelin"
-
-description: "SAP BusinessObjects BI platform can simplify the lives of business users and IT staff. SAP BusinessObjects is based on universes. The universe contains dual-semantic layer model. The users make queries upon universes. This interpreter is new interface for universes."
-
-group: interpreter
-
----
-
-
-
-{% include JB/setup %}
-
-# SAP BusinessObjects (Universe) Interpreter for Apache Zeppelin
-
-
-
-## Overview
-
-[SAP BusinessObjects BI platform (universes)](https://help.sap.com/viewer/p/SAP_BUSINESSOBJECTS_BUSINESS_INTELLIGENCE_PLATFORM) can simplify the lives of business users and IT staff. SAP BusinessObjects is based on universes. The universe contains dual-semantic layer model. The users make queries upon universes. This interpreter is new interface for universes.
-
-*Disclaimer* SAP interpreter is not official interpreter for SAP BusinessObjects BI platform. It uses [BI Semantic Layer REST API](https://help.sap.com/viewer/5431204882b44fc98d56bd752e69f132/4.2.5/en-US/ec54808e6fdb101497906a7cb0e91070.html)
-
-This interpreter is not directly supported by SAP AG.
-
-Tested with versions 4.2SP3 (14.2.3.2220) and 4.2SP5. There is no support for filters in UNX-universes converted from old UNV format.
-
-The universe name must be unique.
-
-## Configuring SAP Universe Interpreter
-
-At the "Interpreters" menu, you can edit SAP interpreter or create new one. Zeppelin provides these properties for SAP.
-
-
-
-
Property Name
-
Value
-
Description
-
-
-
universe.api.url
-
http://localhost:6405/biprws
-
The base url for the SAP BusinessObjects BI platform. You have to edit "localhost" that you may use (ex. http://0.0.0.0:6405/biprws)
-
-
-
universe.authType
-
secEnterprise
-
The type of authentication for API of Universe. Available values: secEnterprise, secLDAP, secWinAD, secSAPR3
-
-
-
universe.password
-
-
The BI platform user password
-
-
-
universe.user
-
Administrator
-
The BI platform user login
-
-
-
-![SAP Interpreter Setting]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/sap-interpreter-setting.png)
-
-### How to use
-
-
Choose the universe
-
Choose dimensions and measures in `select` statement
-
Define conditions in `where` statement
-You can compare two dimensions/measures or use Filter (without value).
-Dimesions/Measures can be compared with static values, may be `is null` or `is not null`, contains or not in list.
-Available the nested conditions (using braces "()"). "and" operator have more priority than "or".
-
-
-If generated query contains promtps, then promtps will appear as dynamic form after paragraph submitting.
-
-#### Example query
-```sql
-%sap
-
-universe [Universe Name];
-
-select
-
- [Folder1].[Dimension2],
-
- [Folder2].[Dimension3],
-
- [Measure1]
-
-where
-
- [Filter1]
-
- and [Date] > '2018-01-01 00:00:00'
-
- and [Folder1].[Dimension4] is not null
-
- and [Folder1].[Dimension5] in ('Value1', 'Value2');
-```
-
-### `distinct` keyword
-You can write keyword `distinct` after keyword `select` to return only distinct (different) values.
-
-#### Example query
-```sql
-%sap
-universe [Universe Name];
-
-select distinct
- [Folder1].[Dimension2], [Measure1]
-where
- [Filter1];
-```
-
-### `limit` keyword
-You can write keyword `limit` and limit value in the end of query to limit the number of records returned based on a limit value.
-
-#### Example query
-```sql
-%sap
-universe [Universe Name];
-
-select
- [Folder1].[Dimension2], [Measure1]
-where
- [Filter1]
-limit 100;
-```
-
-## Object Interpolation
-The SAP interpreter also supports interpolation of `ZeppelinContext` objects into the paragraph text.
-To enable this feature set `universe.interpolation` to `true`. The following example shows one use of this facility:
-
-####In Scala cell:
-
-```scala
-z.put("curr_date", "2018-01-01 00:00:00")
-```
-
-####In later SAP cell:
-
-```sql
-where
- [Filter1]
- and [Date] > '{curr_date}'
-```
\ No newline at end of file
diff --git a/docs/interpreter/scalding.md b/docs/interpreter/scalding.md
deleted file mode 100644
index 02c5fb8b31f..00000000000
--- a/docs/interpreter/scalding.md
+++ /dev/null
@@ -1,168 +0,0 @@
----
-layout: page
-title: "Scalding Interpreter for Apache Zeppelin"
-description: "Scalding is an open source Scala library for writing MapReduce jobs."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Scalding Interpreter for Apache Zeppelin
-
-
-
-[Scalding](https://github.com/twitter/scalding) is an open source Scala library for writing MapReduce jobs.
-
-## Building the Scalding Interpreter
-You have to first build the Scalding interpreter by enable the **scalding** profile as follows:
-
-```bash
-mvn clean package -Pscalding -DskipTests
-```
-
-## Enabling the Scalding Interpreter
-In a notebook, to enable the **Scalding** interpreter, click on the **Gear** icon,select **Scalding**, and hit **Save**.
-
-
-
-## Configuring the Interpreter
-
-Scalding interpreter runs in two modes:
-
-* local
-* hdfs
-
-In the local mode, you can access files on the local server and scalding transformation are done locally.
-
-In hdfs mode you can access files in HDFS and scalding transformation are run as hadoop map-reduce jobs.
-
-Zeppelin comes with a pre-configured Scalding interpreter in local mode.
-
-To run the scalding interpreter in the hdfs mode you have to do the following:
-
-**Set the classpath with ZEPPELIN\_CLASSPATH\_OVERRIDES**
-
-In conf/zeppelin_env.sh, you have to set
-ZEPPELIN_CLASSPATH_OVERRIDES to the contents of 'hadoop classpath'
-and directories with custom jar files you need for your scalding commands.
-
-**Set arguments to the scalding repl**
-
-The default arguments are: `--local --repl`
-
-For hdfs mode you need to add: `--hdfs --repl`
-
-If you want to add custom jars, you need to add: `-libjars directory/*:directory/*`
-
-For reducer estimation, you need to add something like:
-`-Dscalding.reducer.estimator.classes=com.twitter.scalding.reducer_estimation.InputSizeReducerEstimator`
-
-**Set max.open.instances**
-
-If you want to control the maximum number of open interpreters, you have to select "scoped" interpreter for note
-option and set `max.open.instances` argument.
-
-## Testing the Interpreter
-
-### Local mode
-
-In example, by using the [Alice in Wonderland](https://gist.github.com/johnynek/a47699caa62f4f38a3e2) tutorial,
-we will count words (of course!), and plot a graph of the top 10 words in the book.
-
-```scala
-%scalding
-
-import scala.io.Source
-
-// Get the Alice in Wonderland book from gutenberg.org:
-val alice = Source.fromURL("http://www.gutenberg.org/files/11/11.txt").getLines
-val aliceLineNum = alice.zipWithIndex.toList
-val alicePipe = TypedPipe.from(aliceLineNum)
-
-// Now get a list of words for the book:
-val aliceWords = alicePipe.flatMap { case (text, _) => text.split("\\s+").toList }
-
-// Now lets add a count for each word:
-val aliceWithCount = aliceWords.filterNot(_.equals("")).map { word => (word, 1L) }
-
-// let's sum them for each word:
-val wordCount = aliceWithCount.group.sum
-
-print ("Here are the top 10 words\n")
-val top10 = wordCount
- .groupAll
- .sortBy { case (word, count) => -count }
- .take(10)
-top10.dump
-
-```
-```
-%scalding
-
-val table = "words\t count\n" + top10.toIterator.map{case (k, (word, count)) => s"$word\t$count"}.mkString("\n")
-print("%table " + table)
-
-```
-
-If you click on the icon for the pie chart, you should be able to see a chart like this:
-![Scalding - Pie - Chart]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/scalding-pie.png)
-
-
-### HDFS mode
-
-**Test mode**
-
-```
-%scalding
-mode
-```
-This command should print:
-
-```
-res4: com.twitter.scalding.Mode = Hdfs(true,Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml)
-```
-
-
-**Test HDFS read**
-
-```scala
-val testfile = TypedPipe.from(TextLine("/user/x/testfile"))
-testfile.dump
-```
-
-This command should print the contents of the hdfs file /user/x/testfile.
-
-**Test map-reduce job**
-
-```scala
-val testfile = TypedPipe.from(TextLine("/user/x/testfile"))
-val a = testfile.groupAll.size.values
-a.toList
-
-```
-
-This command should create a map reduce job.
-
-## Future Work
-* Better user feedback (hadoop url, progress updates)
-* Ability to cancel jobs
-* Ability to dynamically load jars without restarting the interpreter
-* Multiuser scalability (run scalding interpreters on different servers)
diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md
deleted file mode 100644
index cb8d1278ec0..00000000000
--- a/docs/interpreter/scio.md
+++ /dev/null
@@ -1,169 +0,0 @@
----
-layout: page
-title: "Scio Interpreter for Apache Zeppelin"
-description: "Scio is a Scala DSL for Apache Beam/Google Dataflow model."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Scio Interpreter for Apache Zeppelin
-
-
-
-## Overview
-Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPlatform/DataflowJavaSDK) and [Apache Beam](http://beam.incubator.apache.org/) inspired by [Spark](http://spark.apache.org/) and [Scalding](https://github.com/twitter/scalding). See the current [wiki](https://github.com/spotify/scio/wiki) and [API documentation](http://spotify.github.io/scio/) for more information.
-
-## Configuration
-
-
-
Name
-
Default Value
-
Description
-
-
-
zeppelin.scio.argz
-
--runner=InProcessPipelineRunner
-
Scio interpreter wide arguments. Documentation: https://github.com/spotify/scio/wiki#options and https://cloud.google.com/dataflow/pipelines/specifying-exec-params
-
-
-
zeppelin.scio.maxResult
-
1000
-
Max number of SCollection results to display
-
-
-
-
-## Enabling the Scio Interpreter
-
-In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **beam** (**beam.scio**).
-
-## Using the Scio Interpreter
-
-In a paragraph, use `%beam.scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`.
-
-```scala
-%beam.scio
-val (sc, args) = ContextAndArgs(argz)
-```
-
-Use `sc` context the way you would in a regular pipeline/REPL.
-
-Example:
-
-```scala
-%beam.scio
-val (sc, args) = ContextAndArgs(argz)
-sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay()
-```
-
-If you close Scio context, go ahead an create a new one using `ContextAndArgs`. Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more complex examples. You can close Scio context much the same way as in Scio REPL, and use Zeppelin display helpers to synchronously close and display results - read more below.
-
-### Progress
-
-There can be only one paragraph running at once. There is no notion of overall progress, thus progress bar will show `0`.
-
-### SCollection display helpers
-
-Scio interpreter comes with display helpers to ease working with Zeppelin notebooks. Simply use `closeAndDisplay()` on `SCollection` to close context and display the results. The number of results is limited by `zeppelin.scio.maxResult` (by default 1000).
-
-Supported `SCollection` types:
-
- * Scio's typed BigQuery
- * Scala's Products (case classes, tuples)
- * Google BigQuery's TableRow
- * Apache Avro
- * All Scala's `AnyVal`
-
-#### Helper methods
-
-There are different helper methods for different objects. You can easily display results from `SCollection`, `Future[Tap]` and `Tap`.
-
-##### `SCollection` helper
-
-`SCollection` has `closeAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously close Scio context, and once available pull and display results.
-
-##### `Future[Tap]` helper
-
-`Future[Tap]` has `waitAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously wait for results, and once available pull and display results.
-
-##### `Tap` helper
-
-`Tap` has `display` Zeppelin helper method for types listed above. Use it to pull and display results.
-
-### Examples
-
-#### BigQuery example:
-
-```scala
-%beam.scio
-@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
- |FROM [bigquery-samples:airline_ontime_data.flights]
- |group by departure_airport
- |order by 2 desc
- |limit 10""".stripMargin) class Flights
-
-val (sc, args) = ContextAndArgs(argz)
-sc.bigQuerySelect(Flights.query).closeAndDisplay(Flights.schema)
-```
-
-#### BigQuery typed example:
-
-```scala
-%beam.scio
-@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
- |FROM [bigquery-samples:airline_ontime_data.flights]
- |group by departure_airport
- |order by 2 desc
- |limit 10""".stripMargin) class Flights
-
-val (sc, args) = ContextAndArgs(argz)
-sc.typedBigQuery[Flights]().flatMap(_.no_of_delays).mean.closeAndDisplay()
-```
-
-#### Avro example:
-
-```scala
-%beam.scio
-import com.spotify.data.ExampleAvro
-
-val (sc, args) = ContextAndArgs(argz)
-sc.avroFile[ExampleAvro]("gs:///tmp/my.avro").take(10).closeAndDisplay()
-```
-
-#### Avro example with a view schema:
-
-```scala
-%beam.scio
-import com.spotify.data.ExampleAvro
-import org.apache.avro.Schema
-
-val (sc, args) = ContextAndArgs(argz)
-val view = Schema.parse("""{"type":"record","name":"ExampleAvro","namespace":"com.spotify.data","fields":[{"name":"track","type":"string"}, {"name":"artist", "type":"string"}]}""")
-
-sc.avroFile[EndSongCleaned]("gs:///tmp/my.avro").take(10).closeAndDisplay(view)
-```
-
-### Google credentials
-
-Scio Interpreter will try to infer your Google Cloud credentials from its environment, it will take into the account:
-
- * `argz` interpreter settings ([doc](https://github.com/spotify/scio/wiki#options))
- * environment variable (`GOOGLE_APPLICATION_CREDENTIALS`)
- * gcloud configuration
-
-#### BigQuery macro credentials
-
-Currently BigQuery project for macro expansion is inferred using Google Dataflow's [DefaultProjectFactory().create()](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java#L187)
diff --git a/docs/interpreter/shell.md b/docs/interpreter/shell.md
index 631c1337354..865b9150fb4 100644
--- a/docs/interpreter/shell.md
+++ b/docs/interpreter/shell.md
@@ -79,6 +79,11 @@ At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property v
Internal and external IP mapping of zeppelin server
+
+
zeppelin.concurrency.max
+
10
+
Max concurrency of shell interpreter
+
## Example
diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md
index fd0356ded89..680ca054b3b 100644
--- a/docs/interpreter/spark.md
+++ b/docs/interpreter/spark.md
@@ -26,7 +26,7 @@ limitations under the License.
## Overview
[Apache Spark](http://spark.apache.org) is a fast and general-purpose cluster computing system.
It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs.
-Apache Spark is supported in Zeppelin with Spark interpreter group which consists of below six interpreters.
+Apache Spark is supported in Zeppelin with Spark interpreter group which consists of following interpreters.
@@ -52,20 +52,88 @@ Apache Spark is supported in Zeppelin with Spark interpreter group which consist
%spark.r
SparkRInterpreter
-
Provides an R environment with SparkR support
+
Provides an vanilla R environment with SparkR support
+
+
+
%spark.ir
+
SparkIRInterpreter
+
Provides an R environment with SparkR support based on Jupyter IRKernel
+
+
+
%spark.shiny
+
SparkShinyInterpreter
+
Used to create R shiny app with SparkR support
%spark.sql
SparkSQLInterpreter
Provides a SQL environment
+
+
+## Main Features
+
+
+
+
Feature
+
Description
+
+
+
Support multiple versions of Spark
+
You can run different versions of Spark in one Zeppelin instance
+
+
+
Support multiple versions of Scala
+
You can run different Scala versions (2.12/2.13) of Spark in on Zeppelin instance
+
+
+
Support multiple languages
+
Scala, SQL, Python, R are supported, besides that you can also collaborate across languages, e.g. you can write Scala UDF and use it in PySpark
+
+
+
Support multiple execution modes
+
Local | Standalone | Yarn | K8s
+
+
+
Interactive development
+
Interactive development user experience increase your productivity
+
+
-
%spark.kotlin
-
KotlinSparkInterpreter
-
Provides a Kotlin environment
+
Inline Visualization
+
You can visualize Spark Dataset/DataFrame vis Python/R's plotting libraries, and even you can make SparkR Shiny app in Zeppelin
+
+
+
+
Multi-tenancy
+
Multiple user can work in one Zeppelin instance without affecting each other.
+
+
+
+
Rest API Support
+
You can not only submit Spark job via Zeppelin notebook UI, but also can do that via its rest api (You can use Zeppelin as Spark job server).
+## Play Spark in Zeppelin docker
+
+For beginner, we would suggest you to play Spark in Zeppelin docker.
+In the Zeppelin docker image, we have already installed
+miniconda and lots of [useful python and R libraries](https://github.com/apache/zeppelin/blob/branch-0.10/scripts/docker/zeppelin/bin/env_python_3_with_R.yml)
+including IPython and IRkernel prerequisites, so `%spark.pyspark` would use IPython and `%spark.ir` is enabled.
+Without any extra configuration, you can run most of tutorial notes under folder `Spark Tutorial` directly.
+
+First you need to download Spark, because there's no Spark binary distribution shipped with Zeppelin.
+e.g. Here we download Spark 3.1.2 to`/mnt/disk1/spark-3.1.2`,
+and we mount it to Zeppelin docker container and run the following command to start Zeppelin docker container.
+
+```bash
+docker run -u $(id -u) -p 8080:8080 -p 4040:4040 --rm -v /mnt/disk1/spark-3.1.2:/opt/spark -e SPARK_HOME=/opt/spark --name zeppelin apache/zeppelin:0.10.0
+```
+
+After running the above command, you can open `http://localhost:8080` to play Spark in Zeppelin. We only verify the spark local mode in Zeppelin docker, other modes may not work due to network issues.
+`-p 4040:4040` is to expose Spark web ui, so that you can access Spark web ui via `http://localhost:8081`.
+
## Configuration
The Spark interpreter can be configured with properties provided by Zeppelin.
You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to [Spark Available Properties](http://spark.apache.org/docs/latest/configuration.html#available-properties).
@@ -201,40 +269,35 @@ You can also set other Spark properties which are not listed in the table. For a
Overrides Spark UI default URL. Value should be a full URL (ex: http://{hostName}/{uniquePath}.
- In Kubernetes mode, value can be Jinja template string with 3 template variables 'PORT', 'SERVICE_NAME' and 'SERVICE_DOMAIN'.
- (ex: http://{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}})
+ In Kubernetes mode, value can be Jinja template string with 3 template variables PORT, {% raw %} SERVICE_NAME {% endraw %} and {% raw %} SERVICE_DOMAIN {% endraw %}.
+ (e.g.: {% raw %}http://{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}} {% endraw %}). In yarn mode, value could be a knox url with {% raw %} {{applicationId}} {% endraw %} as placeholder,
+ (e.g.: {% raw %}https://knox-server:8443/gateway/yarnui/yarn/proxy/{{applicationId}}/{% endraw %})
spark.webui.yarn.useProxy
false
-
whether use yarn proxy url as spark weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004
-
-
-
spark.repl.target
-
jvm-1.6
-
- Manually specifying the Java version of Spark Interpreter Scala REPL,Available options:
- scala-compile v2.10.7 to v2.11.12 supports "jvm-1.5, jvm-1.6, jvm-1.7 and jvm-1.8", and the default value is jvm-1.6.
- scala-compile v2.10.1 to v2.10.6 supports "jvm-1.5, jvm-1.6, jvm-1.7", and the default value is jvm-1.6.
- scala-compile v2.12.x defaults to jvm-1.8, and only supports jvm-1.8.
-
+
whether use yarn proxy url as Spark weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004
Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps.
-### Export SPARK_HOME
+* Set SPARK_HOME
+* Set master
+
+
+### Set SPARK_HOME
There are several options for setting `SPARK_HOME`.
* Set `SPARK_HOME` in `zeppelin-env.sh`
-* Set `SPARK_HOME` in Interpreter setting page
+* Set `SPARK_HOME` in interpreter setting page
* Set `SPARK_HOME` via [inline generic configuration](../usage/interpreter/overview.html#inline-generic-confinterpreter)
-#### 1. Set `SPARK_HOME` in `zeppelin-env.sh`
+#### Set `SPARK_HOME` in `zeppelin-env.sh`
-If you work with only one version of spark, then you can set `SPARK_HOME` in `zeppelin-env.sh` because any setting in `zeppelin-env.sh` is globally applied.
+If you work with only one version of Spark, then you can set `SPARK_HOME` in `zeppelin-env.sh` because any setting in `zeppelin-env.sh` is globally applied.
e.g.
@@ -251,21 +314,14 @@ export HADOOP_CONF_DIR=/usr/lib/hadoop
```
-#### 2. Set `SPARK_HOME` in Interpreter setting page
+#### Set `SPARK_HOME` in interpreter setting page
-If you want to use multiple versions of spark, then you need create multiple spark interpreters and set `SPARK_HOME` for each of them. e.g.
-Create a new spark interpreter `spark24` for spark 2.4 and set `SPARK_HOME` in interpreter setting page
-
-
-
-
-Create a new spark interpreter `spark16` for spark 1.6 and set `SPARK_HOME` in interpreter setting page
-
-
-
+If you want to use multiple versions of Spark, then you need to create multiple Spark interpreters and set `SPARK_HOME` separately. e.g.
+Create a new Spark interpreter `spark33` for Spark 3.3 and set its `SPARK_HOME` in interpreter setting page,
+Create a new Spark interpreter `spark34` for Spark 3.4 and set its `SPARK_HOME` in interpreter setting page.
-#### 3. Set `SPARK_HOME` via [inline generic configuration](../usage/interpreter/overview.html#inline-generic-confinterpreter)
+#### Set `SPARK_HOME` via [inline generic configuration](../usage/interpreter/overview.html#inline-generic-confinterpreter)
Besides setting `SPARK_HOME` in interpreter setting page, you can also use inline generic configuration to put the
configuration with code together for more flexibility. e.g.
@@ -273,23 +329,26 @@ configuration with code together for more flexibility. e.g.
-### Set master in Interpreter menu
-After starting Zeppelin, go to **Interpreter** menu and edit **spark.master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type.
+### Set master
+
+After setting `SPARK_HOME`, you need to set **spark.master** property in either interpreter setting page or inline configuartion. The value may vary depending on your Spark cluster deployment type.
For example,
* **local[*]** in local mode
* **spark://master:7077** in standalone cluster
- * **yarn-client** in Yarn client mode (Not supported in spark 3.x, refer below for how to configure yarn-client in Spark 3.x)
- * **yarn-cluster** in Yarn cluster mode (Not supported in spark 3.x, refer below for how to configure yarn-client in Spark 3.x)
+ * **yarn-client** in Yarn client mode (Not supported in Spark 3.x, refer below for how to configure yarn-client in Spark 3.x)
+ * **yarn-cluster** in Yarn cluster mode (Not supported in Spark 3.x, refer below for how to configure yarn-cluster in Spark 3.x)
* **mesos://host:5050** in Mesos cluster
That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way.
For the further information about Spark & Zeppelin version compatibility, please refer to "Available Interpreters" section in [Zeppelin download page](https://zeppelin.apache.org/download.html).
-> Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile.
+Note that without setting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile. And this included version Spark has limited function, so it
+is always recommended to set `SPARK_HOME`.
-> Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So we suggest you only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`.
+Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many Spark interpreters running at the same time. So we suggest you
+only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`.
#### Configure yarn mode for Spark 3.x
@@ -314,77 +373,55 @@ Specifying `yarn-client` & `yarn-cluster` in `spark.master` is not supported in
-## SparkContext, SQLContext, SparkSession, ZeppelinContext
+## Interpreter binding mode
-SparkContext, SQLContext, SparkSession (for spark 2.x) and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext`, `spark` and `z`, respectively, in Scala, Kotlin, Python and R environments.
+The default [interpreter binding mode](../usage/interpreter/interpreter_binding_mode.html) is `globally shared`. That means all notes share the same Spark interpreter.
+So we recommend you to use `isolated per note` which means each note has own Spark interpreter without affecting each other. But it may run out of your machine resource if too many
+Spark interpreters are created, so we recommend to always use yarn-cluster mode in production if you run Spark in hadoop cluster. And you can use [inline configuration](../usage/interpreter/overview.html#inline-generic-configuration) via `%spark.conf` in the first paragraph to customize your spark configuration.
-> Note that Scala/Python/R environment shares the same SparkContext, SQLContext, SparkSession and ZeppelinContext instance.
+You can also choose `scoped` mode. For `scoped` per note mode, Zeppelin creates separated scala compiler/python shell for each note but share a single `SparkContext/SqlContext/SparkSession`.
-## YARN Mode
-Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify `SPARK_HOME` & `HADOOP_CONF_DIR`.
-Usually you only have one hadoop cluster, so you can set `HADOOP_CONF_DIR` in `zeppelin-env.sh` which is applied to all spark interpreters. If you want to use spark against multiple hadoop cluster, then you need to define
-`HADOOP_CONF_DIR` in interpreter setting or via inline generic configuration.
-## Dependency Management
+## SparkContext, SQLContext, SparkSession, ZeppelinContext
-For spark interpreter, it is not recommended to use Zeppelin's [Dependency Management](../usage/interpreter/dependency_management.html) for managing
-third party dependencies (`%spark.dep` is removed from Zeppelin 0.9 as well). Instead you should set the standard Spark properties.
+SparkContext, SparkSession and ZeppelinContext are automatically created and exposed as variable names `sc`, `spark` and `z` respectively, in Scala, Python and R environments.
-
-
-
Spark Property
-
Spark Submit Argument
-
Description
-
-
-
spark.files
-
--files
-
Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed.
-
-
-
spark.jars
-
--jars
-
Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed.
-
-
-
spark.jars.packages
-
--packages
-
Comma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories.
-
-
-You can either set Spark properties in interpreter setting page or set Spark submit arguments in `zeppelin-env.sh` via environment variable `SPARK_SUBMIT_OPTIONS`.
-For examples:
+> Note that Scala/Python/R environment shares the same SparkContext, SQLContext, SparkSession and ZeppelinContext instance.
-```bash
-export SPARK_SUBMIT_OPTIONS="--files --jars --packages "
-```
+## Yarn Mode
+
+Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify `SPARK_HOME` & `HADOOP_CONF_DIR`.
+Usually you only have one hadoop cluster, so you can set `HADOOP_CONF_DIR` in `zeppelin-env.sh` which is applied to all Spark interpreters. If you want to use spark against multiple hadoop cluster, then you need to define
+`HADOOP_CONF_DIR` in interpreter setting or via inline generic configuration.
+
+## K8s Mode
-But it is not recommended to set them in `SPARK_SUBMIT_OPTIONS`. Because it will be shared by all spark interpreters, which means you can not set different dependencies for different users.
+Regarding how to run Spark on K8s in Zeppelin, please check [this doc](../quickstart/kubernetes.html).
## PySpark
-There're 2 ways to use PySpark in Zeppelin:
+There are 2 ways to use PySpark in Zeppelin:
* Vanilla PySpark
* IPySpark
### Vanilla PySpark (Not Recommended)
-Vanilla PySpark interpreter is almost the same as vanilla Python interpreter except Zeppelin inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`.
-By default, Zeppelin would use IPython in `%spark.pyspark` when IPython is available, Otherwise it would fall back to the original PySpark implementation.
-If you don't want to use IPython, then you can set `zeppelin.pyspark.useIPython` as `false` in interpreter setting. For the IPython features, you can refer doc
-[Python Interpreter](python.html)
+Vanilla PySpark interpreter is almost the same as vanilla Python interpreter except Spark interpreter inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`.
+
+By default, Zeppelin would use IPython in `%spark.pyspark` when IPython is available (Zeppelin would check whether ipython's prerequisites are met), Otherwise it would fall back to the vanilla PySpark implementation.
### IPySpark (Recommended)
-You can use `IPySpark` explicitly via `%spark.ipyspark`. IPySpark interpreter is almost the same as IPython interpreter except Zeppelin inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`.
-For the IPython features, you can refer doc [Python Interpreter](python.html)
+
+You can use `IPySpark` explicitly via `%spark.ipyspark`. IPySpark interpreter is almost the same as IPython interpreter except Spark interpreter inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`.
+For the IPython features, you can refer doc [Python Interpreter](python.html#ipython-interpreter-pythonipython-recommended)
## SparkR
-Zeppelin support SparkR via `%spark.r`. Here's configuration for SparkR Interpreter.
+Zeppelin support SparkR via `%spark.r`, `%spark.ir` and `%spark.shiny`. Here's configuration for SparkR Interpreter.
@@ -412,12 +449,28 @@ Zeppelin support SparkR via `%spark.r`. Here's configuration for SparkR Interpre
Shiny app would launch a web app at some port, this property is to specify the portRange via format ':', e.g. '5000:5001'. By default it is ':' which means any port
+
+Refer [R doc](r.html) for how to use R in Zeppelin.
## SparkSql
-Spark Sql Interpreter share the same SparkContext/SparkSession with other Spark interpreter. That means any table registered in scala, python or r code can be accessed by Spark Sql.
+Spark sql interpreter share the same SparkContext/SparkSession with other Spark interpreters. That means any table registered in scala, python or r code can be accessed by Spark sql.
For examples:
```scala
@@ -435,11 +488,13 @@ df.createOrReplaceTempView("people")
select * from people
```
-By default, each sql statement would run sequentially in `%spark.sql`. But you can run them concurrently by following setup.
+You can write multiple sql statements in one paragraph. Each sql statement is separated by semicolon.
+Sql statement in one paragraph would run sequentially.
+But sql statements in different paragraphs can run concurrently by the following configuration.
-1. Set `zeppelin.spark.concurrentSQL` to true to enable the sql concurrent feature, underneath zeppelin will change to use fairscheduler for spark. And also set `zeppelin.spark.concurrentSQL.max` to control the max number of sql statements running concurrently.
+1. Set `zeppelin.spark.concurrentSQL` to true to enable the sql concurrent feature, underneath zeppelin will change to use fairscheduler for Spark. And also set `zeppelin.spark.concurrentSQL.max` to control the max number of sql statements running concurrently.
2. Configure pools by creating `fairscheduler.xml` under your `SPARK_CONF_DIR`, check the official spark doc [Configuring Pool Properties](http://spark.apache.org/docs/latest/job-scheduling.html#configuring-pool-properties)
-3. Set pool property via setting paragraph property. e.g.
+3. Set pool property via setting paragraph local property. e.g.
```
%spark(pool=pool1)
@@ -448,25 +503,61 @@ By default, each sql statement would run sequentially in `%spark.sql`. But you c
```
This pool feature is also available for all versions of scala Spark, PySpark. For SparkR, it is only available starting from 2.3.0.
-
-## Interpreter Setting Option
-You can choose one of `shared`, `scoped` and `isolated` options when you configure Spark interpreter.
-e.g.
+## Dependency Management
+
+For Spark interpreter, it is not recommended to use Zeppelin's [Dependency Management](../usage/interpreter/dependency_management.html) for managing
+third party dependencies (`%spark.dep` is removed from Zeppelin 0.9 as well). Instead, you should set the standard Spark properties as following:
+
+
+
+
Spark Property
+
Spark Submit Argument
+
Description
+
+
+
spark.files
+
--files
+
Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed.
+
+
+
spark.jars
+
--jars
+
Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed.
+
+
+
spark.jars.packages
+
--packages
+
Comma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories.
+
+
+
+As general Spark properties, you can set them in via inline configuration or interpreter setting page or in `zeppelin-env.sh` via environment variable `SPARK_SUBMIT_OPTIONS`.
+For examples:
+
+```bash
+export SPARK_SUBMIT_OPTIONS="--files --jars --packages "
+```
+
+To be noticed, `SPARK_SUBMIT_OPTIONS` is deprecated and will be removed in future release.
-* In `scoped` per user mode, Zeppelin creates separated Scala compiler for each user but share a single SparkContext.
-* In `isolated` per user mode, Zeppelin creates separated SparkContext for each user.
## ZeppelinContext
+
Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities.
-See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details.
+See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. For Spark interpreter, you can use z to display Spark `Dataset/Dataframe`.
+
+
+
+
## Setting up Zeppelin with Kerberos
+
Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN:
-There're several ways to make spark work with kerberos enabled hadoop cluster in Zeppelin.
+There are several ways to make Spark work with kerberos enabled hadoop cluster in Zeppelin.
1. Share one single hadoop cluster.
In this case you just need to specify `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` in zeppelin-site.xml, Spark interpreter will use these setting by default.
@@ -474,11 +565,26 @@ In this case you just need to specify `zeppelin.server.kerberos.keytab` and `zep
2. Work with multiple hadoop clusters.
In this case you can specify `spark.yarn.keytab` and `spark.yarn.principal` to override `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal`.
+### Configuration Setup
+
+1. On the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf.
+ This is to make the server communicate with KDC.
+
+2. Add the two properties below to Spark configuration (`[SPARK_HOME]/conf/spark-defaults.conf`):
+
+ ```
+ spark.yarn.principal
+ spark.yarn.keytab
+ ```
+
+> **NOTE:** If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI.
+
+3. That's it. Play with Zeppelin!
## User Impersonation
-In yarn mode, the user who launch the zeppelin server will be used to launch the spark yarn application. This is not a good practise.
-Most of time, you will enable shiro in Zeppelin and would like to use the login user to submit the spark yarn app. For this purpose,
+In yarn mode, the user who launch the zeppelin server will be used to launch the Spark yarn application. This is not a good practise.
+Most of time, you will enable shiro in Zeppelin and would like to use the login user to submit the Spark yarn app. For this purpose,
you need to enable user impersonation for more security control. In order the enable user impersonation, you need to do the following steps
**Step 1** Enable user impersonation setting hadoop's `core-site.xml`. E.g. if you are using user `zeppelin` to launch Zeppelin, then add the following to `core-site.xml`, then restart both hdfs and yarn.
@@ -500,27 +606,6 @@ you need to enable user impersonation for more security control. In order the en
**Step 3(Optional)** If you are using kerberos cluster, then you need to set `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` to the user(aka. user in Step 1) you want to
impersonate in `zeppelin-site.xml`.
+## Community
-
-## Deprecate Spark 2.2 and earlier versions
-Starting from 0.9, Zeppelin deprecate Spark 2.2 and earlier versions. So you will see a warning message when you use Spark 2.2 and earlier.
-You can get rid of this message by setting `zeppelin.spark.deprecatedMsg.show` to `false`.
-
-
-
-### Configuration Setup
-
-1. On the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf.
-This is to make the server communicate with KDC.
-
-2. Add the two properties below to Spark configuration (`[SPARK_HOME]/conf/spark-defaults.conf`):
-
- ```
- spark.yarn.principal
- spark.yarn.keytab
- ```
-
- > **NOTE:** If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI.
-
-3. That's it. Play with Zeppelin!
-
+[Join our community](http://zeppelin.apache.org/community.html) to discuss with others.
diff --git a/docs/interpreter/submarine.md b/docs/interpreter/submarine.md
deleted file mode 100644
index 97be1bb7e02..00000000000
--- a/docs/interpreter/submarine.md
+++ /dev/null
@@ -1,407 +0,0 @@
----
-layout: page
-title: "Apache Hadoop Submarine Interpreter for Apache Zeppelin"
-description: "Hadoop Submarine is the latest machine learning framework subproject in the Hadoop 3.1 release. It allows Hadoop to support Tensorflow, MXNet, Caffe, Spark, etc."
-group: interpreter
----
-
-{% include JB/setup %}
-
-# Submarine Interpreter for Apache Zeppelin
-
-
-
-[Hadoop Submarine ](https://hadoop.apache.org/submarine/) is the latest machine learning framework subproject in the Hadoop 3.1 release. It allows Hadoop to support Tensorflow, MXNet, Caffe, Spark, etc. A variety of deep learning frameworks provide a full-featured system framework for machine learning algorithm development, distributed model training, model management, and model publishing, combined with hadoop's intrinsic data storage and data processing capabilities to enable data scientists to Good mining and the value of the data.
-
-A deep learning algorithm project requires data acquisition, data processing, data cleaning, interactive visual programming adjustment parameters, algorithm testing, algorithm publishing, algorithm job scheduling, offline model training, model online services and many other processes and processes. Zeppelin is a web-based notebook that supports interactive data analysis. You can use SQL, Scala, Python, etc. to make data-driven, interactive, collaborative documents.
-
-You can use the more than 20 interpreters in zeppelin (for example: spark, hive, Cassandra, Elasticsearch, Kylin, HBase, etc.) to collect data, clean data, feature extraction, etc. in the data in Hadoop before completing the machine learning model training. The data preprocessing process.
-
-By integrating submarine in zeppelin, we use zeppelin's data discovery, data analysis and data visualization and collaboration capabilities to visualize the results of algorithm development and parameter adjustment during machine learning model training.
-
-## Architecture
-
-
-
-As shown in the figure above, how the Submarine develops and models the machine learning algorithms through Zeppelin is explained from the system architecture.
-
-After installing and deploying Hadoop 3.1+ and Zeppelin, submarine will create a fully separate Zeppelin Submarine interpreter Docker container for each user in YARN. This container contains the development and runtime environment for Tensorflow. Zeppelin Server connects to the Zeppelin Submarine interpreter Docker container in YARN. allows algorithmic engineers to perform algorithm development and data visualization in Tensorflow's stand-alone environment in Zeppelin Notebook.
-
-After the algorithm is developed, the algorithm engineer can submit the algorithm directly to the YARN in offline transfer training in Zeppelin, real-time demonstration of model training with Submarine's TensorBoard for each algorithm engineer.
-
-You can not only complete the model training of the algorithm, but you can also use the more than twenty interpreters in Zeppelin. Complete the data preprocessing of the model, For example, you can perform data extraction, filtering, and feature extraction through the Spark interpreter in Zeppelin in the Algorithm Note.
-
-In the future, you can also use Zeppelin's upcoming Workflow workflow orchestration service. You can complete Spark, Hive data processing and Tensorflow model training in one Note. It is organized into a workflow through visualization, etc., and the scheduling of jobs is performed in the production environment.
-
-## Overview
-
-
-
-As shown in the figure above, from the internal implementation, how Submarine combines Zeppelin's machine learning algorithm development and model training.
-
-1. The algorithm engineer created a Tensorflow notebook (left image) in Zeppelin by using Submarine interpreter.
-
- It is important to note that you need to complete the development of the entire algorithm in a Note.
-
-2. You can use Spark for data preprocessing in some of the paragraphs in Note.
-
-3. Use Python for algorithm development and debugging of Tensorflow in other paragraphs of notebook, Submarine creates a Zeppelin Submarine Interpreter Docker Container for you in YARN, which contains the following features and services:
-
- + **Shell Command line tool**:Allows you to view the system environment in the Zeppelin Submarine Interpreter Docker Container, Install the extension tools you need or the Python dependencies.
- + **Kerberos lib**:Allows you to perform kerberos authentication and access to Hadoop clusters with Kerberos authentication enabled.
- + **Tensorflow environment**:Allows you to develop tensorflow algorithm code.
- + **Python environment**:Allows you to develop tensorflow code.
- + Complete a complete algorithm development with a Note in Zeppelin. If this algorithm contains multiple modules, You can write different algorithm modules in multiple paragraphs in Note. The title of each paragraph is the name of the algorithm module. The content of the paragraph is the code content of this algorithm module.
- + **HDFS Client**:Zeppelin Submarine Interpreter will automatically submit the algorithm code you wrote in Note to HDFS.
-
- **Submarine interpreter Docker Image** It is Submarine that provides you with an image file that supports Tensorflow (CPU and GPU versions).
-And installed the algorithm library commonly used by Python.
-You can also install other development dependencies you need on top of the base image provided by Submarine.
-
-4. When you complete the development of the algorithm module, You can do this by creating a new paragraph in Note and typing `%submarine dashboard`. Zeppelin will create a Submarine Dashboard. The machine learning algorithm written in this Note can be submitted to YARN as a JOB by selecting the `JOB RUN` command option in the Control Panel. Create a Tensorflow Model Training Docker Container, The container contains the following sections:
-
- + Tensorflow environment
- + HDFS Client Will automatically download the algorithm file Mount from HDFS into the container for distributed model training. Mount the algorithm file to the Work Dir path of the container.
-
- **Submarine Tensorflow Docker Image** There is Submarine that provides you with an image file that supports Tensorflow (CPU and GPU versions). And installed the algorithm library commonly used by Python. You can also install other development dependencies you need on top of the base image provided by Submarine.
-
-
-
-
Name
-
Class
-
Description
-
-
-
%submarine
-
SubmarineInterpreter
-
Provides interpreter for Apache Submarine dashboard
-
-
-
%submarine.sh
-
SubmarineShellInterpreter
-
Provides interpreter for Apache Submarine shell
-
-
-
%submarine.python
-
PySubmarineInterpreter
-
Provides interpreter for Apache Submarine python
-
-
-
-### Submarine shell
-
-After creating a Note with Submarine Interpreter in Zeppelin, You can add a paragraph to Note if you need it. Using the %submarine.sh identifier, you can use the Shell command to perform various operations on the Submarine Interpreter Docker Container, such as:
-
-1. View the Pythone version in the Container
-2. View the system environment of the Container
-3. Install the dependencies you need yourself
-4. Kerberos certification with kinit
-5. Use Hadoop in Container for HDFS operations, etc.
-
-### Submarine python
-
-You can add one or more paragraphs to Note. Write the algorithm module for Tensorflow in Python using the `%submarine.python` identifier.
-
-### Submarine Dashboard
-
-After writing the Tensorflow algorithm by using `%submarine.python`, You can add a paragraph to Note. Enter the %submarine dashboard and execute it. Zeppelin will create a Submarine Dashboard.
-
-
-
-With Submarine Dashboard you can do all the operational control of Submarine, for example:
-
-1. **Usage**:Display Submarine's command description to help developers locate problems.
-
-2. **Refresh**:Zeppelin will erase all your input in the Dashboard.
-
-3. **Tensorboard**:You will be redirected to the Tensorboard WEB system created by Submarine for each user. With Tensorboard you can view the real-time status of the Tensorflow model training in real time.
-
-4. **Command**
-
- + **JOB RUN**:Selecting `JOB RUN` will display the parameter input interface for submitting JOB.
-
-
-
-
-
Name
-
Description
-
-
-
Checkpoint Path/td>
-
Submarine sets up a separate Checkpoint path for each user's Note for Tensorflow training. Saved the training data for this Note history, Used to train the output of model data, Tensorboard uses the data in this path for model presentation. Users cannot modify it. For example: `hdfs://cluster1/...` , The environment variable name for Checkpoint Path is `%checkpoint_path%`, You can use `%checkpoint_path%` instead of the input value in Data Path in `PS Launch Cmd` and `Worker Launch Cmd`.
-
-
-
Input Path
-
The user specifies the data data directory of the Tensorflow algorithm. Only HDFS-enabled directories are supported. The environment variable name for Data Path is `%input_path%`, You can use `%input_path%` instead of the input value in Data Path in `PS Launch Cmd` and `Worker Launch Cmd`.
-
- + **JOB STOP**
-
- You can choose to execute the `JOB STOP` command. Stop a Tensorflow model training task that has been submitted and is running
-
- + **TENSORBOARD START**
-
- You can choose to execute the `TENSORBOARD START` command to create your TENSORBOARD Docker Container.
-
- + **TENSORBOARD STOP**
-
- You can choose to execute the `TENSORBOARD STOP` command to stop and destroy your TENSORBOARD Docker Container.
-
-5. **Run Command**:Execute the action command of your choice
-6. **Clean Chechkpoint**:Checking this option will clear the data in this Note's Checkpoint Path before each `JOB RUN` execution.
-
-### Configuration
-
-Zeppelin Submarine interpreter provides the following properties to customize the Submarine interpreter
-
-
-
-
Attribute name
-
Attribute value
-
Description
-
-
-
DOCKER_CONTAINER_TIME_ZONE
-
Etc/UTC
-
Set the time zone in the container |
-
-
-
DOCKER_HADOOP_HDFS_HOME
-
/hadoop-3.1-0
-
Hadoop path in the following 3 images(SUBMARINE_INTERPRETER_DOCKER_IMAGE、tf.parameter.services.docker.image、tf.worker.services.docker.image) |
-
-
-
DOCKER_JAVA_HOME
-
/opt/java
-
JAVA path in the following 3 images(SUBMARINE_INTERPRETER_DOCKER_IMAGE、tf.parameter.services.docker.image、tf.worker.services.docker.image) |
-
-
-
HADOOP_YARN_SUBMARINE_JAR
-
-
Path to the Submarine JAR package in the Hadoop-3.1+ release installed on the Zeppelin server |
-
-
-
INTERPRETER_LAUNCH_MODE
-
local/yarn
-
Run the Submarine interpreter instance in local or YARN local mainly for submarine interpreter development and debugging YARN mode for production environment |
-
-
-
SUBMARINE_HADOOP_CONF_DIR
-
-
Set the HADOOP-CONF path to support multiple Hadoop cluster environments
-
-
-
SUBMARINE_HADOOP_HOME
-
-
Hadoop-3.1+ above path installed on the Zeppelin server
-
-
-
SUBMARINE_HADOOP_KEYTAB
-
-
Keytab file path for a hadoop cluster with kerberos authentication turned on
-
-
-
SUBMARINE_HADOOP_PRINCIPAL
-
-
PRINCIPAL information for the keytab file of the hadoop cluster with kerberos authentication turned on
-
-
-
SUBMARINE_INTERPRETER_DOCKER_IMAGE
-
-
At INTERPRETER_LAUNCH_MODE=yarn, Submarine uses this image to create a Zeppelin Submarine interpreter container to create an algorithm development environment for the user. |
-
-
-
docker.container.network
-
-
YARN's Docker network name
-
-
-
machinelearing.distributed.enable
-
-
Whether to use the model training of the distributed mode JOB RUN submission
-
-
-
shell.command.timeout.millisecs
-
60000
-
Execute timeout settings for shell commands in the Submarine interpreter container
-
-
-
submarine.algorithm.hdfs.path
-
-
Save machine-based algorithms developed using Submarine interpreter to HDFS as files
-
-
-
submarine.yarn.queue
-
root.default
-
Submarine submits model training YARN queue name
-
-
-
tf.checkpoint.path
-
-
Tensorflow checkpoint path, Each user will create a user's checkpoint secondary path using the username under this path. Each algorithm submitted by the user will create a checkpoint three-level path using the note id (the user's Tensorboard uses the checkpoint data in this path for visual display)
-
-
-
tf.parameter.services.cpu
-
-
Number of CPU cores applied to Tensorflow parameter services when Submarine submits model distributed training
-
-
-
tf.parameter.services.docker.image
-
-
Submarine creates a mirror for Tensorflow parameter services when submitting model distributed training
-
-
-
tf.parameter.services.gpu
-
-
GPU cores applied to Tensorflow parameter services when Submarine submits model distributed training
-
-
-
tf.parameter.services.memory
-
2G
-
Memory resources requested by Tensorflow parameter services when Submarine submits model distributed training
-
-
-
tf.parameter.services.num
-
-
Number of Tensorflow parameter services used by Submarine to submit model distributed training
-
-
-
tf.tensorboard.enable
-
true
-
Create a separate Tensorboard for each user
-
-
-
tf.worker.services.cpu
-
-
Submarine submits model resources for Tensorflow worker services when submitting model training
-
-
-
tf.worker.services.docker.image
-
-
Submarine creates a mirror for Tensorflow worker services when submitting model distributed training
-
-
-
tf.worker.services.gpu
-
-
Submarine submits GPU resources for Tensorflow worker services when submitting model training
-
-
-
tf.worker.services.memory
-
-
Submarine submits model resources for Tensorflow worker services when submitting model training
-
-
-
tf.worker.services.num
-
-
Number of Tensorflow worker services used by Submarine to submit model distributed training
-
-
-
yarn.webapp.http.address
-
http://hadoop:8088
-
YARN web ui address
-
-
-
zeppelin.interpreter.rpc.portRange
-
29914
-
You need to export this port in the SUBMARINE_INTERPRETER_DOCKER_IMAGE configuration image. RPC communication for Zeppelin Server and Submarine interpreter containers
-
-
-
zeppelin.ipython.grpc.message_size
-
33554432
-
Message size setting for IPython grpc in Submarine interpreter container
-
-
-
zeppelin.ipython.launch.timeout
-
30000
-
IPython execution timeout setting in Submarine interpreter container
-
-
-
zeppelin.python
-
python
-
Execution path of python in Submarine interpreter container
-
-
-
zeppelin.python.maxResult
-
10000
-
The maximum number of python execution results returned from the Submarine interpreter container
-
-
-
zeppelin.python.useIPython
-
false
-
IPython is currently not supported and must be false
-
-
-
zeppelin.submarine.auth.type
-
simple/kerberos
-
Has Hadoop turned on kerberos authentication?
-
-
-
-### Docker images
-
-The docker images file is stored in the `zeppelin/scripts/docker/submarine` directory.
-
-1. submarine interpreter cpu version
-
-2. submarine interpreter gpu version
-
-3. tensorflow 1.10 & hadoop 3.1.2 cpu version
-
-4. tensorflow 1.10 & hadoop 3.1.2 gpu version
-
-
-## Change Log
-
-**0.1.0** _(Zeppelin 0.9.0)_ :
-
-* Support distributed or standolone tensorflow model training.
-* Support submarine interpreter running local.
-* Support submarine interpreter running YARN.
-* Support Docker on YARN-3.3.0, Plan compatible with lower versions of yarn.
-
-## Bugs & Contacts
-
-+ **Submarine interpreter BUG**
- If you encounter a bug for this interpreter, please create a sub **JIRA** ticket on [ZEPPELIN-3856](https://issues.apache.org/jira/browse/ZEPPELIN-3856).
-+ **Submarine Running problem**
- If you encounter a problem for Submarine runtime, please create a **ISSUE** on [hadoop-submarine-ecosystem](https://github.com/hadoopsubmarine/hadoop-submarine-ecosystem).
-+ **YARN Submarine BUG**
- If you encounter a bug for Yarn Submarine, please create a **JIRA** ticket on [SUBMARINE](https://issues.apache.org/jira/browse/SUBMARINE).
-
-## Dependency
-
-1. **YARN**
- Submarine currently need to run on Hadoop 3.3+
-
- + The hadoop version of the hadoop submarine team git repository is periodically submitted to the code repository of the hadoop.
- + The version of the git repository for the hadoop submarine team will be faster than the hadoop version release cycle.
- + You can use the hadoop version of the hadoop submarine team git repository.
-
-2. **Submarine runtime environment**
- you can use Submarine-installer https://github.com/hadoopsubmarine, Deploy Docker and network environments.
-
-## More
-
-**Hadoop Submarine Project**: https://hadoop.apache.org/submarine
-**Youtube Submarine Channel**: https://www.youtube.com/channel/UC4JBt8Y8VJ0BW0IM9YpdCyQ
\ No newline at end of file
diff --git a/docs/quickstart/docker.md b/docs/quickstart/docker.md
index 0c6a478ff12..17e6229d7bd 100644
--- a/docs/quickstart/docker.md
+++ b/docs/quickstart/docker.md
@@ -19,9 +19,9 @@ limitations under the License.
-->
{% include JB/setup %}
-# Zeppelin interpreter on Docker
+# Zeppelin Interpreter on Docker
-Zeppelin service runs on local server. zeppelin is able to run the interpreter in the docker container, Isolating the operating environment of the interpreter through the docker container. Zeppelin can be easily used without having to install python, spark, etc. on the local node.
+Zeppelin service runs on local server. Zeppelin is able to run the interpreter in the docker container, Isolating the operating environment of the interpreter through the docker container. Zeppelin can be easily used without having to install python, spark, etc. on the local node.
Key benefits are
@@ -55,6 +55,15 @@ vi `/etc/docker/daemon.json`, Add `tcp://0.0.0.0:2375` to the `hosts` configurat
`hosts` property reference: https://docs.docker.com/engine/reference/commandline/dockerd/
+#### Security warning
+
+Making the Docker daemon available over TCP is potentially dangerous: as you
+can read [here](https://docs.docker.com/engine/security/#docker-daemon-attack-surface),
+the docker daemon typically has broad privileges, so only trusted users should
+have access to it. If you expose the daemon over TCP, you must use firewalling
+to make sure only trusted users can access the port. This also includes making
+sure the interpreter docker containers that are started by Zeppelin do not have
+access to this port.
## Quickstart
@@ -79,7 +88,7 @@ vi `/etc/docker/daemon.json`, Add `tcp://0.0.0.0:2375` to the `hosts` configurat
Set to the same time zone as the zeppelin server, keeping the time zone in the interpreter docker container the same as the server. E.g, `"America/New_York"` or `"Asia/Shanghai"`
```bash
- export DOCKER_TIME_ZONE="America/New_York"
+ export ZEPPELIN_DOCKER_TIME_ZONE="America/New_York"
```
@@ -140,7 +149,6 @@ Zeppelin service runs on local server, it auto configure itself to use `DockerIn
- Keytab file configured in the interpreter properties
- zeppelin.shell.keytab.location
- spark.yarn.keytab
- - submarine.hadoop.keytab
- zeppelin.jdbc.keytab.location
- zeppelin.server.kerberos.keytab
diff --git a/docs/quickstart/flink_with_zeppelin.md b/docs/quickstart/flink_with_zeppelin.md
new file mode 100644
index 00000000000..70f7970b997
--- /dev/null
+++ b/docs/quickstart/flink_with_zeppelin.md
@@ -0,0 +1,42 @@
+---
+layout: page
+title: "Flink with Zeppelin"
+description: ""
+group: quickstart
+---
+
+{% include JB/setup %}
+
+# Flink support in Zeppelin
+
+
+
+
+
+For a brief overview of Apache Flink fundamentals with Apache Zeppelin, see the following guide:
+
+- **built-in** Apache Flink integration.
+- With [Flink Scala Scala](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/repls/scala_shell/) [PyFlink Shell](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/repls/python_shell/), [Flink SQL](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/dev/table/sql/overview/)
+- Inject ExecutionEnvironment, StreamExecutionEnvironment, BatchTableEnvironment, StreamTableEnvironment.
+- Canceling job and displaying its progress
+- Supports different modes: local, remote, yarn, yarn-application
+- Dependency management
+- Streaming Visualization
+
+
+
+For the further information about Flink support in Zeppelin, please check
+
+- [Flink Interpreter](../interpreter/flink.html)
diff --git a/docs/quickstart/install.md b/docs/quickstart/install.md
index aa14d9ffa99..c4e57692aa8 100644
--- a/docs/quickstart/install.md
+++ b/docs/quickstart/install.md
@@ -35,8 +35,8 @@ Apache Zeppelin officially supports and is tested on the following environments:
Value
-
OpenJDK or Oracle JDK
-
1.8 (151+) (set JAVA_HOME)
+
Java
+
JDK 11 (set JAVA_HOME)
OS
@@ -50,7 +50,7 @@ Two binary packages are available on the [download page](http://zeppelin.apache.
- **all interpreter package**: unpack it in a directory of your choice and you're ready to go.
- **net-install interpreter package**: only spark, python, markdown and shell interpreter included. Unpack and follow [install additional interpreters](../usage/interpreter/installation.html) to install other interpreters. If you're unsure, just run `./bin/install-interpreter.sh --all` and install all interpreters.
-
+
### Building Zeppelin from source
Follow the instructions [How to Build](../setup/basics/how_to_build.html), If you want to build from source instead of using binary package.
@@ -67,9 +67,11 @@ bin/zeppelin-daemon.sh start
After Zeppelin has started successfully, go to [http://localhost:8080](http://localhost:8080) with your web browser.
-By default Zeppelin is listening at `127.0.0.1:8080`, so you can't access it when it is deployed in another remote machine.
+By default Zeppelin is listening at `127.0.0.1:8080`, so you can't access it when it is deployed on another remote machine.
To access a remote Zeppelin, you need to change `zeppelin.server.addr` to `0.0.0.0` in `conf/zeppelin-site.xml`.
+Check log file at `ZEPPELIN_HOME/logs/zeppelin-server-*.log` if you can not open Zeppelin.
+
#### Stopping Zeppelin
```
@@ -84,15 +86,27 @@ Make sure that [docker](https://www.docker.com/community-edition) is installed i
Use this command to launch Apache Zeppelin in a container.
```bash
-docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin:0.9.0
+docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin:0.10.0
```
+
To persist `logs` and `notebook` directories, use the [volume](https://docs.docker.com/engine/reference/commandline/run/#mount-volume--v-read-only) option for docker container.
```bash
-docker run -p 8080:8080 --rm -v $PWD/logs:/logs -v $PWD/notebook:/notebook \
+docker run -u $(id -u) -p 8080:8080 --rm -v $PWD/logs:/logs -v $PWD/notebook:/notebook \
-e ZEPPELIN_LOG_DIR='/logs' -e ZEPPELIN_NOTEBOOK_DIR='/notebook' \
- --name zeppelin apache/zeppelin:0.9.0
+ --name zeppelin apache/zeppelin:0.10.0
+```
+
+`-u $(id -u)` is to make sure you have the permission to write logs and notebooks.
+
+For many interpreters, they require other dependencies, e.g. Spark interpreter requires Spark binary distribution
+and Flink interpreter requires Flink binary distribution. You can also mount them via docker volumn. e.g.
+
+```bash
+docker run -u $(id -u) -p 8080:8080 --rm -v /mnt/disk1/notebook:/notebook \
+-v /usr/lib/spark-current:/opt/spark -v /mnt/disk1/flink-1.12.2:/opt/flink -e FLINK_HOME=/opt/flink \
+-e SPARK_HOME=/opt/spark -e ZEPPELIN_NOTEBOOK_DIR='/notebook' --name zeppelin apache/zeppelin:0.10.0
```
If you have trouble accessing `localhost:8080` in the browser, Please clear browser cache.
@@ -146,13 +160,15 @@ Congratulations, you have successfully installed Apache Zeppelin! Here are a few
#### New to Apache Zeppelin...
* For an in-depth overview, head to [Explore Zeppelin UI](../quickstart/explore_ui.html).
- * And then, try run [Tutorial Notebook](http://localhost:8080/#/notebook/2A94M5J1Z) in your Zeppelin.
+ * And then, try run Tutorial Notebooks shipped with your Zeppelin distribution.
* And see how to change [configurations](../setup/operation/configuration.html) like port number, etc.
-#### Spark, Python, SQL, and more
+#### Spark, Flink, SQL, Python, R and more
* [Spark support in Zeppelin](./spark_with_zeppelin.html), to know more about deep integration with [Apache Spark](http://spark.apache.org/).
+ * [Flink support in Zeppelin](./flink_with_zeppelin.html), to know more about deep integration with [Apache Flink](http://flink.apache.org/).
* [SQL support in Zeppelin](./sql_with_zeppelin.html) for SQL support
* [Python support in Zeppelin](./python_with_zeppelin.html), for Matplotlib, Pandas, Conda/Docker integration.
+ * [R support in Zeppelin](./r_with_zeppelin.html)
* [All Available Interpreters](../#available-interpreters)
#### Multi-user support ...
diff --git a/docs/quickstart/kubernetes.md b/docs/quickstart/kubernetes.md
index 1c0b99af72e..470614f2f04 100644
--- a/docs/quickstart/kubernetes.md
+++ b/docs/quickstart/kubernetes.md
@@ -34,10 +34,10 @@ Key benefits are
- Zeppelin >= 0.9.0 docker image
- Spark >= 2.4.0 docker image (in case of using Spark Interpreter)
- - A running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/)
+ - A running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/)
- [Kubernetes DNS](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/) configured in your cluster
- Enough cpu and memory in your Kubernetes cluster. We recommend 4CPUs, 6g of memory to be able to start Spark Interpreter with few executors.
-
+
- If you're using [minikube](https://kubernetes.io/docs/setup/minikube/), check your cluster capacity (`kubectl describe node`) and increase if necessary
```
@@ -46,38 +46,77 @@ Key benefits are
$ minikube config set memory
$ minikube start
$ minikube config view
- ```
+ ```
## Quickstart
-Get `zeppelin-server.yaml` from github repository or find it from Zeppelin distribution package.
+Let's first clone the Zeppelin repository from GitHub:
+
+```sh
+git clone https://github.com/apache/zeppelin.git
+cd zeppelin
+# you can check out to your desired version/branch
+# git checkout tags/v0.10.1
+# just make sure you check the version inside "./pom.xml"
+```
+
+Now we are going to create the `zeppelin-distribution` image. This may take some time and this image will be used as a base for the upcoming required images:
+```sh
+docker build -t zeppelin-distribution:latest -f ./Dockerfile .
```
-# Get it from Zeppelin distribution package.
-$ ls /k8s/zeppelin-server.yaml
-# or download it from github
-$ curl -s -O https://raw.githubusercontent.com/apache/zeppelin/master/k8s/zeppelin-server.yaml
+Next, we will build our `zeppelin-server` image:
+
+```sh
+cd scripts/docker/zeppelin-server
+# Looking at the "./pom.xml" we can see the version is 0.12.0-SNAPSHOT
+# Let's set the correct version in our Dockerfile:
+# vi Dockerfile
+# ARG version="0.12.0-SNAPSHOT"
+# Once you saved the Dockerfile with the correct version we can build our image:
+docker build -t zeppelin-server:0.12.0-SNAPSHOT -f ./Dockerfile .
```
-Start zeppelin on kubernetes cluster,
+The last image we build is `zeppelin-interpreter`:
+
+```sh
+cd scripts/docker/zeppelin-interpreter
+docker build -t zeppelin-interpreter:0.12.0-SNAPSHOT -f ./Dockerfile .
+```
+
+So we should now have the following images:
+
+```sh
+# sudo if you are on Linux and Docker requires root
+$ docker images
+REPOSITORY TAG IMAGE ID CREATED SIZE
+zeppelin-interpreter 0.12.0-SNAPSHOT 4f77fe989eed 3 minutes ago 622MB
+zeppelin-server 0.12.0-SNAPSHOT 4f77fe989eed 3 minutes ago 622MB
+zeppelin-distribution latest bd2fb4b321d2 40 minutes ago 1.27GB
```
+
+Reminder: Please adjust the images in the YAML-File of `zeppelin-server.yaml`
+
+Start zeppelin on Kubernetes cluster,
+
+```sh
kubectl apply -f zeppelin-server.yaml
```
Port forward Zeppelin server port,
-
-```
+
+```sh
kubectl port-forward zeppelin-server 8080:80
```
and browse [localhost:8080](http://localhost:8080).
-Try run some paragraphs and see each interpreter is running as a Pod (using `kubectl get pods`), instead of a local process.
+Try running some paragraphs and see if each interpreter is running as a Pod (using `kubectl get pods`), instead of a local process.
-To shutdown,
+To shut down,
-```
+```sh
kubectl delete -f zeppelin-server.yaml
```
@@ -104,7 +143,7 @@ Create note and configure executor number (default 1)
```
%spark.conf
spark.executor.instances 5
-```
+```
And then start your spark interpreter
@@ -114,7 +153,7 @@ sc.parallelize(1 to 100).count
...
```
While `spark.master` property of SparkInterpreter starts with `k8s://` (default `k8s://https://kubernetes.default.svc` when Zeppelin started using zeppelin-server.yaml), Spark executors will be automatically created in your Kubernetes cluster.
-Spark UI is accessible by clicking `SPARK JOB` on the Paragraph.
+Spark UI is accessible by clicking `SPARK JOB` on the Paragraph.
Check [here](https://spark.apache.org/docs/latest/running-on-kubernetes.html) to know more about Running Spark on Kubernetes.
@@ -124,13 +163,13 @@ Check [here](https://spark.apache.org/docs/latest/running-on-kubernetes.html) to
To build your own Zeppelin image, first build Zeppelin project with `-Pbuild-distr` flag.
```
-$ mvn package -DskipTests -Pbuild-distr
+$ ./mvnw package -DskipTests -Pbuild-distr
```
Binary package will be created under `zeppelin-distribution/target` directory. Move created package file under `scripts/docker/zeppelin/bin/` directory.
```
-$ mv zeppelin-distribution/target/zeppelin-*.tar.gz scripts/docker/zeppelin/bin/
+$ mv zeppelin-distribution/target/zeppelin-*-bin.tgz scripts/docker/zeppelin/bin/
```
`scripts/docker/zeppelin/bin/Dockerfile` downloads package from internet. Modify the file to add package from filesystem.
@@ -155,7 +194,7 @@ Then build docker image.
```
# configure docker env, if you're using minikube
-$ eval $(minikube docker-env)
+$ eval $(minikube docker-env)
# change directory
$ cd scripts/docker/zeppelin/bin/
@@ -245,9 +284,11 @@ to customize,
4. Run a paragraph will create an interpreter using modified yaml files.
The interpreter pod can also be customized through the interpreter settings. Here are some of the properties:
+
| Property Name | Default Value | Description |
| ----- | ----- | ----- |
-| `zeppelin.k8s.namespace` | `default` | The Kubernetes namespace to use. |
+| `zeppelin.k8s.interpreter.namespace` | `default` | Specify the namespace of the current interpreter. Users can set different namespaces for different interpreters. In order to minimize permissions, the interpreter pod can only be created in the `default` namespace by default. If users need to create an interpreter pod in other namespaces, they need to add the corresponding `rolebinding` in `k8s/zeppelin-server.yaml`.|
+| `zeppelin.k8s.interpreter.serviceAccount` | `default` | The Kubernetes service account to use. |
| `zeppelin.k8s.interpreter.container.image` | `apache/zeppelin:` | The interpreter image to use. |
| `zeppelin.k8s.interpreter.cores` | (optional) | The number of cpu cores to use. |
| `zeppelin.k8s.interpreter.memory` | (optional) | The memory to use, e.g., `1g`. |
@@ -256,6 +297,9 @@ The interpreter pod can also be customized through the interpreter settings. Her
| `zeppelin.k8s.interpreter.imagePullSecrets` | (optional) | Set the comma-separated list of Kubernetes secrets while pulling images, e.g., `mysecret1,mysecret2` |
| `zeppelin.k8s.interpreter.container.imagePullPolicy` | (optional) | Set the pull policy of the interpreter image, e.g., `Always` |
| `zeppelin.k8s.spark.container.imagePullPolicy` | (optional) | Set the pull policy of the spark image, e.g., `Always` |
+| `zeppelin.spark.uiWebUrl` | `//{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}}` | The URL for user to access Spark UI. The default value is a [jinjava](https://github.com/HubSpot/jinjava) template that contains three variables. |
+| `zeppelin.k8s.spark.useIngress` | (optional) | If true, the [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) will be created when creating the spark interpreter. So users can access the Spark UI through Ingress. |
+| `zeppelin.k8s.spark.ingress.host` | `{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}}` | If `zeppelin.k8s.spark.useIngress` is `true`, it configures the `host` value of the Ingress. The default value is a [jinjava](https://github.com/HubSpot/jinjava) template that contains three variables. Users can access the Spark UI through a customized `zeppelin.k8s.spark.ingress.host`. |
## Future work
diff --git a/docs/quickstart/python_with_zeppelin.md b/docs/quickstart/python_with_zeppelin.md
index 80237f8c4a6..76b3d5883e3 100644
--- a/docs/quickstart/python_with_zeppelin.md
+++ b/docs/quickstart/python_with_zeppelin.md
@@ -27,16 +27,17 @@ limitations under the License.
The following guides explain how to use Apache Zeppelin that enables you to write in Python:
+- supports [vanilla python](../interpreter/python.html#vanilla-python-interpreter-python) and [ipython](../interpreter/python.html#ipython-interpreter-pythonipython-recommended)
- supports flexible python environments using [conda](../interpreter/python.html#conda), [docker](../interpreter/python.html#docker)
- can query using [PandasSQL](../interpreter/python.html#sql-over-pandas-dataframes)
- also, provides [PySpark](../interpreter/spark.html)
+- [run python interpreter in yarn cluster](../interpreter/python.html#run-python-in-yarn-cluster) with customized conda python environment.
- with [matplotlib integration](../interpreter/python.html#matplotlib-integration)
-- support [ipython](../interpreter/python.html#ipython-interpreter-pythonipython-recommended)
- can create results including **UI widgets** using [Dynamic Form](../interpreter/python.html#using-zeppelin-dynamic-forms)
-For the further information about Spark support in Zeppelin, please check
+For the further information about Python support in Zeppelin, please check
- [Python Interpreter](../interpreter/python.html)
diff --git a/docs/quickstart/r_with_zeppelin.md b/docs/quickstart/r_with_zeppelin.md
new file mode 100644
index 00000000000..f9b9feb6596
--- /dev/null
+++ b/docs/quickstart/r_with_zeppelin.md
@@ -0,0 +1,42 @@
+---
+layout: page
+title: "R with Zeppelin"
+description: ""
+group: quickstart
+---
+
+{% include JB/setup %}
+
+# R support in Zeppelin
+
+
+
+
+
+The following guides explain how to use Apache Zeppelin that enables you to write in R:
+
+- Supports [vanilla R](../interpreter/r.html#how-to-use-r-interpreter) and [IRkernel](../interpreter/r.html#how-to-use-r-interpreter)
+- Visualize R dataframe via [ZeppelinContext](../interpreter/r.html#zshow)
+- [Run R interpreter in yarn cluster](../interpreter/r.html#run-r-in-yarn-cluster) with customized conda R environment.
+- [Make R Shiny App] (../interpreter/r.html#make-shiny-app-in-zeppelin)
+
+
+
+For the further information about R support in Zeppelin, please check
+
+- [R Interpreter](../interpreter/r.html)
+
+
+
diff --git a/docs/quickstart/spark_with_zeppelin.md b/docs/quickstart/spark_with_zeppelin.md
index 6b35beb2af2..7afa608e741 100644
--- a/docs/quickstart/spark_with_zeppelin.md
+++ b/docs/quickstart/spark_with_zeppelin.md
@@ -28,12 +28,13 @@ limitations under the License.
For a brief overview of Apache Spark fundamentals with Apache Zeppelin, see the following guide:
- **built-in** Apache Spark integration.
-- with [SparkSQL](http://spark.apache.org/sql/), [PySpark](https://spark.apache.org/docs/latest/api/python/pyspark.html), [SparkR](https://spark.apache.org/docs/latest/sparkr.html)
-- inject [SparkContext](https://spark.apache.org/docs/latest/api/java/org/apache/spark/SparkContext.html), [SQLContext](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [SparkSession](https://spark.apache.org/docs/latest/sql-programming-guide.html) automatically
-- canceling job and displaying its progress
-- supporting [Spark Cluster Mode](../setup/deployment/spark_cluster_mode.html#apache-zeppelin-on-spark-cluster-mode) for external spark clusters
-- supports [different context per user / note](../usage/interpreter/interpreter_binding_mode.html)
-- sharing variables among PySpark, SparkR and Spark through [ZeppelinContext](../interpreter/spark.html#zeppelincontext)
+- With [Spark Scala](https://spark.apache.org/docs/latest/quick-start.html) [SparkSQL](http://spark.apache.org/sql/), [PySpark](https://spark.apache.org/docs/latest/api/python/), [SparkR](https://spark.apache.org/docs/latest/sparkr.html)
+- Inject [SparkContext](https://spark.apache.org/docs/latest/api/java/org/apache/spark/SparkContext.html), [SQLContext](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [SparkSession](https://spark.apache.org/docs/latest/sql-programming-guide.html) automatically
+- Canceling job and displaying its progress
+- Supports different modes: local, standalone, yarn(client & cluster), k8s
+- Dependency management
+- Supports [different context per user / note](../usage/interpreter/interpreter_binding_mode.html)
+- Sharing variables among PySpark, SparkR and Spark through [ZeppelinContext](../interpreter/spark.html#zeppelincontext)
- [Livy Interpreter](../interpreter/livy.html)
diff --git a/docs/quickstart/sql_with_zeppelin.md b/docs/quickstart/sql_with_zeppelin.md
index df63ccd3813..d82cd61abf5 100644
--- a/docs/quickstart/sql_with_zeppelin.md
+++ b/docs/quickstart/sql_with_zeppelin.md
@@ -33,16 +33,21 @@ The following guides explain how to use Apache Zeppelin that enables you to writ
* [MariaDB](../interpreter/jdbc.html#mariadb)
* [AWS Redshift](../interpreter/jdbc.html#redshift)
* [Apache Hive](../interpreter/jdbc.html#apache-hive)
+ * [Presto/Trino](../interpreter/jdbc.html#prestotrino)
+ * [Impala](../interpreter/jdbc.html#impala)
+ * [Apache Kyuubi](../interpreter/jdbc.html#apache-kyuubi)
* [Apache Phoenix](../interpreter/jdbc.html#apache-phoenix)
* [Apache Drill](../interpreter/jdbc.html#apache-drill)
* [Apache Tajo](../interpreter/jdbc.html#apache-tajo)
* and so on
- [Spark Interpreter](../interpreter/spark.html) supports [SparkSQL](http://spark.apache.org/sql/)
+- [Flink Interpreter](../interpreter/flink.html) supports [Flink SQL](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/dev/table/sql/overview/)
- [Python Interpreter](../interpreter/python.html) supports [pandasSQL](../interpreter/python.html#sql-over-pandas-dataframes)
- can create query result including **UI widgets** using [Dynamic Form](../usage/dynamic_form/intro.html)
```sql
- %sql
+ %sql
+
select age, count(1) value
from bank
where age < ${maxAge=30}
@@ -56,9 +61,8 @@ For the further information about SQL support in Zeppelin, please check
- [JDBC Interpreter](../interpreter/jdbc.html)
- [Spark Interpreter](../interpreter/spark.html)
+- [Flink Interpreter](../interpreter/flink.html)
- [Python Interpreter](../interpreter/python.html)
-- [IgniteSQL Interpreter](../interpreter/ignite.html#ignite-sql-interpreter) for [Apache Ignite](https://ignite.apache.org/)
-- [Kylin Interpreter](../interpreter/kylin.html) for [Apache Kylin](http://kylin.apache.org/)
diff --git a/docs/quickstart/yarn.md b/docs/quickstart/yarn.md
index 60fb48e2fb2..19808a26b47 100644
--- a/docs/quickstart/yarn.md
+++ b/docs/quickstart/yarn.md
@@ -19,11 +19,11 @@ limitations under the License.
-->
{% include JB/setup %}
-# Zeppelin on Yarn
+# Zeppelin Interpreter on Yarn
-Zeppelin on yarn means to run interpreter process in yarn container. The key benefit is the scalability, you won't run out of memory
+Zeppelin is able to run interpreter process in yarn container. The key benefit is the scalability, you won't run out of memory
of the zeppelin server host if you run large amount of interpreter processes.
## Prerequisites
@@ -64,6 +64,11 @@ Besides that, you can also specify other properties as following table.
default
yarn queue name
+
+
zeppelin.interpreter.yarn.node.label.expression
+
+
yarn node label expression specified for interpreter process
+
## Differences with non-yarn interpreter mode (local mode)
diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md
index 7f70c33b5c1..99951a9353a 100644
--- a/docs/setup/basics/how_to_build.md
+++ b/docs/setup/basics/how_to_build.md
@@ -61,7 +61,7 @@ git clone https://github.com/apache/zeppelin.git
You can build Zeppelin with following maven command:
```bash
-mvn clean package -DskipTests [Options]
+./mvnw clean package -DskipTests [Options]
```
Check [build-profiles](#build-profiles) section for further build options.
@@ -79,23 +79,11 @@ You can directly start Zeppelin by running the following command after successfu
### Build profiles
-
-#### Scala profile
-
-To be noticed, this scala profile affect the modules (e.g. cassandra, scalding) that use scala except Spark interpreter (Spark interpreter use other profiles to control its scala version, see the doc below).
-
-Set scala version (default 2.10). Available profiles are
-
-```
--Pscala-2.10
--Pscala-2.11
-```
-
#### Spark Interpreter
-To be noticed, the spark profiles here only affect the embedded mode (no need to specify `SPARK_HOME`) of spark interpreter.
+To be noticed, the spark profiles here only affect the unit test (no need to specify `SPARK_HOME`) of spark interpreter.
Zeppelin doesn't require you to build with different spark to make different versions of spark work in Zeppelin.
-You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 1.6 to 3.0.
+You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 3.3 to 3.5.
To build with a specific Spark version or scala versions, define one or more of the following profiles and options:
@@ -106,43 +94,34 @@ Set spark major version
Available profiles are
```
--Pspark-3.0
--Pspark-2.4
--Pspark-2.3
--Pspark-2.2
--Pspark-2.1
--Pspark-2.0
--Pspark-1.6
+-Pspark-3.5
+-Pspark-3.4
+-Pspark-3.3
```
minor version can be adjusted by `-Dspark.version=x.x.x`
##### `-Pspark-scala-[version] (optional)`
-To be noticed, these profiles also only affect the embedded mode (no need to specify `SPARK_HOME`) of Spark interpreter.
-Actually Zeppelin supports all the versions of scala (2.10, 2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`.
+To be noticed, these profiles also only affect the unit test (no need to specify `SPARK_HOME`) of Spark interpreter.
+Actually Zeppelin supports all the versions of scala (2.12, 2.13) in Spark interpreter as long as you specify `SPARK_HOME`.
Available profiles are
```
--Pspark-scala-2.10
--Pspark-scala-2.11
-Pspark-scala-2.12
+-Pspark-scala-2.13
```
-
-If you want to use Spark 3.x in the embedded mode, then you have to specify both profile `spark-3.0` and `spark-scala-2.12`,
-because Spark 3.x doesn't support scala 2.10 and 2.11.
#### Build hadoop with Zeppelin (`-Phadoop[version]`)
To be noticed, hadoop profiles only affect Zeppelin server, it doesn't affect any interpreter.
Zeppelin server use hadoop in some cases, such as using hdfs as notebook storage. You can check this [page](./hadoop_integration.html) for more details about how to configure hadoop in Zeppelin.
-Set hadoop major version (default hadoop2).
+Set hadoop major version (default hadoop3).
Available profiles are
```
--Phadoop2
-Phadoop3
```
@@ -163,29 +142,18 @@ Build examples under zeppelin-examples directory
Here are some examples with several options:
```bash
-# build with spark-3.0, spark-scala-2.12
-mvn clean package -Pspark-3.0 -Pspark-scala-2.12 -DskipTests
-
-# build with spark-2.4, spark-scala-2.11
-mvn clean package -Pspark-2.4 -Pspark-scala-2.11 -DskipTests
+# build with spark-3.3, spark-scala-2.12
+./mvnw clean package -Pspark-3.3 -Pspark-scala-2.12 -DskipTests
-# build with spark-1.6, spark-scala-2.10
-mvn clean package -Pspark-1.6 -Pspark-scala-2.10 -DskipTests
+# build with spark-3.4, spark-scala-2.13
+./mvnw clean package -Pspark-3.4 -Pspark-scala-2.13 -DskipTests
-# build with CDH
-mvn clean package -Pspark-1.6 -Pspark-scala-2.10 -Dhadoop.version=2.6.0-cdh5.5.0 -Pvendor-repo -DskipTests
```
Ignite Interpreter
```bash
-mvn clean package -Dignite.version=1.9.0 -DskipTests
-```
-
-Scalding Interpreter
-
-```bash
-mvn clean package -Pscalding -DskipTests
+./mvnw clean package -Dignite.version=1.9.0 -DskipTests
```
### Optional configurations
@@ -204,7 +172,7 @@ spark.bin.download.url # default http://d3kbcqa49mib13.cloudfront.net/${spark.ar
Py4J package
```bash
-python.py4j.version # default 0.9.2
+python.py4j.version # default 0.10.9.7
pypi.repo.url # default https://pypi.python.org/packages
python.py4j.repo.folder # default /64/5c/01e13b68e8caafece40d549f232c9b5677ad1016071a48d04cc3895acaa3
```
@@ -218,7 +186,7 @@ Frontend Maven Plugin configurations
```
plugin.frontend.nodeDownloadRoot # default https://nodejs.org/dist/
-plugin.frontend.npmDownloadRoot # default http://registry.npmjs.org/npm/-/
+plugin.frontend.npmDownloadRoot # default https://registry.npmjs.org/npm/-/
plugin.frontend.yarnDownloadRoot # default https://github.com/yarnpkg/yarn/releases/download/
```
@@ -239,23 +207,11 @@ sudo apt-get install r-base-dev
sudo apt-get install r-cran-evaluate
```
-
-
-### Install maven
-
-```bash
-wget http://www.eu.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
-sudo tar -zxf apache-maven-3.6.3-bin.tar.gz -C /usr/local/
-sudo ln -s /usr/local/apache-maven-3.6.3/bin/mvn /usr/local/bin/mvn
-```
-
_Notes:_
- Ensure node is installed by running `node --version`
- - Ensure maven is running version 3.6.3 or higher with `mvn -version`
+ - Ensure maven is running version 3.6.3 or higher with `./mvnw -version`
- Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxMetaspaceSize=512m"`
-
-
## Proxy setting (optional)
If you're behind the proxy, you'll need to configure maven and npm to pass through it.
@@ -325,16 +281,16 @@ _Notes:_
To package the final distribution including the compressed archive, run:
```sh
-mvn clean package -Pbuild-distr
+./mvnw clean package -Pbuild-distr
```
To build a distribution with specific profiles, run:
```sh
-mvn clean package -Pbuild-distr -Pspark-2.4
+./mvnw clean package -Pbuild-distr -Pspark-3.4
```
-The profiles `-Pspark-2.4` can be adjusted if you wish to build to a specific spark versions.
+The profiles `-Pspark-3.4` can be adjusted if you wish to build to a specific spark versions.
The archive is generated under _`zeppelin-distribution/target`_ directory
diff --git a/docs/setup/deployment/cdh.md b/docs/setup/deployment/cdh.md
index 20f819b4ee5..485cd34935d 100644
--- a/docs/setup/deployment/cdh.md
+++ b/docs/setup/deployment/cdh.md
@@ -25,7 +25,7 @@ limitations under the License.
### 1. Import Cloudera QuickStart Docker image
->[Cloudera](http://www.cloudera.com/) has officially provided CDH Docker Hub in their own container. Please check [this guide page](http://www.cloudera.com/documentation/enterprise/latest/topics/quickstart_docker_container.html#cloudera_docker_container) for more information.
+>[Cloudera](http://www.cloudera.com/) has officially provided CDH Docker Hub in their own container. Please check [this guide page](https://hub.docker.com/r/cloudera/quickstart/) for more information.
You can import the Docker image by pulling it from Cloudera Docker Hub.
diff --git a/docs/setup/deployment/flink_and_spark_cluster.md b/docs/setup/deployment/flink_and_spark_cluster.md
index c7936511721..df5df80d9ad 100644
--- a/docs/setup/deployment/flink_and_spark_cluster.md
+++ b/docs/setup/deployment/flink_and_spark_cluster.md
@@ -20,6 +20,8 @@ limitations under the License.
{% include JB/setup %}
+This document is outdated, it is not verified in the latest Zeppelin.
+
# Install with Flink and Spark cluster
@@ -40,8 +42,8 @@ Assuming the minimal install, there are several programs that we will need to in
- git
- openssh-server
-- OpenJDK 7
-- Maven 3.1+
+- OpenJDK 11
+- Maven
For git, openssh-server, and OpenJDK 7 we will be using the apt package manager.
@@ -58,45 +60,10 @@ sudo apt-get install git
sudo apt-get install openssh-server
```
-##### OpenJDK 7
-
-```bash
-sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib
-```
-*A note for those using Ubuntu 16.04*: To install `openjdk-7` on Ubuntu 16.04, one must add a repository. [Source](http://askubuntu.com/questions/761127/ubuntu-16-04-and-openjdk-7)
-
-```bash
-sudo add-apt-repository ppa:openjdk-r/ppa
-sudo apt-get update
-sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib
-```
-
-##### Maven 3.1+
-Zeppelin requires maven version 3.x. The version available in the repositories at the time of writing is 2.x, so maven must be installed manually.
-
-Purge any existing versions of maven.
-
-```bash
-sudo apt-get purge maven maven2
-```
-
-Download the maven 3.3.9 binary.
-
-```bash
-wget "http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz"
-```
-
-Unarchive the binary and move to the `/usr/local` directory.
-
-```bash
-tar -zxvf apache-maven-3.3.9-bin.tar.gz
-sudo mv ./apache-maven-3.3.9 /usr/local
-```
-
-Create symbolic links in `/usr/bin`.
+##### OpenJDK 11
```bash
-sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/bin/mvn
+sudo apt-get install openjdk-11-jdk
```
### Installing Zeppelin
@@ -118,26 +85,23 @@ cd zeppelin
Package Zeppelin.
```bash
-mvn clean package -DskipTests -Pspark-1.6 -Dflink.version=1.1.3 -Pscala-2.10
+./mvnw clean package -DskipTests -Pspark-3.5 -Pflink-1.17
```
`-DskipTests` skips build tests- you're not developing (yet), so you don't need to do tests, the clone version *should* build.
-`-Pspark-1.6` tells maven to build a Zeppelin with Spark 1.6. This is important because Zeppelin has its own Spark interpreter and the versions must be the same.
+`-Pspark-3.5` tells maven to build a Zeppelin with Spark 3.5. This is important because Zeppelin has its own Spark interpreter and the versions must be the same.
-`-Dflink.version=1.1.3` tells maven specifically to build Zeppelin with Flink version 1.1.3.
+`-Pflink-1.17` tells maven to build a Zeppelin with Flink 1.17.
--`-Pscala-2.10` tells maven to build with Scala v2.10.
-
-
-**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 1.6 was the most recent Spark version available.
+**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 3.5 was the most recent Spark version available.
**Note:** On build failures. Having installed Zeppelin close to 30 times now, I will tell you that sometimes the build fails for seemingly no reason.
As long as you didn't edit any code, it is unlikely the build is failing because of something you did. What does tend to happen, is some dependency that maven is trying to download is unreachable. If your build fails on this step here are some tips:
- Don't get discouraged.
- Scroll up and read through the logs. There will be clues there.
-- Retry (that is, run the `mvn clean package -DskipTests -Pspark-1.6` again)
+- Retry (that is, run the `./mvnw clean package -DskipTests -Pspark-3.5` again)
- If there were clues that a dependency couldn't be downloaded wait a few hours or even days and retry again. Open source software when compiling is trying to download all of the dependencies it needs, if a server is off-line there is nothing you can do but wait for it to come back.
- Make sure you followed all of the steps carefully.
- Ask the community to help you. Go [here](http://zeppelin.apache.org/community.html) and join the user mailing list. People are there to help you. Make sure to copy and paste the build output (everything that happened in the console) and include that in your message.
@@ -251,16 +215,16 @@ Building from source is recommended where possible, for simplicity in this tuto
To download the Flink Binary use `wget`
```bash
-wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.1.3/flink-1.1.3-bin-hadoop24-scala_2.10.tgz"
-tar -xzvf flink-1.1.3-bin-hadoop24-scala_2.10.tgz
+wget "https://archive.apache.org/dist/flink/flink-1.17.1/flink-1.17.1-bin-scala_2.12.tgz"
+tar -xzvf flink-1.17.1-bin-scala_2.12.tgz
```
-This will download Flink 1.1.3, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `24` to your appropriate version.
+This will download Flink 1.17.1.
Start the Flink Cluster.
```bash
-flink-1.1.3/bin/start-cluster.sh
+flink-1.17.1/bin/start-cluster.sh
```
###### Building From source
@@ -269,13 +233,13 @@ If you wish to build Flink from source, the following will be instructive. Note
See the [Flink Installation guide](https://github.com/apache/flink/blob/master/README.md) for more detailed instructions.
-Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.1.3-rc2, and build.
+Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.17.1, and build.
```bash
cd $HOME
git clone https://github.com/apache/flink.git
cd flink
-git checkout release-1.1.3-rc2
+git checkout release-1.17.1
mvn clean install -DskipTests
```
@@ -297,8 +261,8 @@ If no task managers are present, restart the Flink cluster with the following co
(if binaries)
```bash
-flink-1.1.3/bin/stop-cluster.sh
-flink-1.1.3/bin/start-cluster.sh
+flink-1.17.1/bin/stop-cluster.sh
+flink-1.17.1/bin/start-cluster.sh
```
@@ -310,7 +274,7 @@ build-target/bin/start-cluster.sh
```
-##### Spark 1.6 Cluster
+##### Spark Cluster
###### Download Binaries
@@ -321,12 +285,12 @@ Using binaries is also
To download the Spark Binary use `wget`
```bash
-wget "http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz"
-tar -xzvf spark-1.6.3-bin-hadoop2.6.tgz
-mv spark-1.6.3-bin-hadoop2.6 spark
+wget "https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz"
+tar -xzvf spark-3.5.2-bin-hadoop3.tgz
+mv spark-3.5.2-bin-hadoop3 spark
```
-This will download Spark 1.6.3, compatible with Hadoop 2.6. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `2.6` to your appropriate version.
+This will download Spark 3.5.2, compatible with Hadoop 3. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `3` to your appropriate version.
###### Building From source
@@ -334,21 +298,18 @@ Spark is an extraordinarily large project, which takes considerable time to down
See the [Spark Installation](https://github.com/apache/spark/blob/master/README.md) guide for more detailed instructions.
-Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-1.6, and build.
-**Note:** Recall, we're only checking out 1.6 because it is the most recent Spark for which a Zeppelin profile exists at
- the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark. However if you use Spark 2.0, the word count example will need to be changed as Spark 2.0 is not compatible with the following examples.
-
+Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-3.5, and build.
```bash
cd $HOME
```
-Clone, check out, and build Spark version 1.6.x.
+Clone, check out, and build Spark version 3.5.x.
```bash
git clone https://github.com/apache/spark.git
cd spark
-git checkout branch-1.6
+git checkout branch-3.5
mvn clean package -DskipTests
```
diff --git a/docs/setup/deployment/virtual_machine.md b/docs/setup/deployment/virtual_machine.md
index a50d1a2ba52..0578b9caa7f 100644
--- a/docs/setup/deployment/virtual_machine.md
+++ b/docs/setup/deployment/virtual_machine.md
@@ -33,14 +33,14 @@ For SparkR users, this script includes several helpful [R Libraries](#r-extras).
### Prerequisites
-This script requires three applications, [Ansible](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip "Ansible"), [Vagrant](http://www.vagrantup.com "Vagrant") and [Virtual Box](https://www.virtualbox.org/ "Virtual Box"). All of these applications are freely available as Open Source projects and extremely easy to set up on most operating systems.
+This script requires three applications, [Ansible](https://www.ansible.com/ "Ansible"), [Vagrant](http://www.vagrantup.com "Vagrant") and [Virtual Box](https://www.virtualbox.org/ "Virtual Box"). All of these applications are freely available as Open Source projects and extremely easy to set up on most operating systems.
## Create a Zeppelin Ready VM
If you are running Windows and don't yet have python installed, [install Python 2.7.x](https://www.python.org/downloads/release/python-2710/) first.
1. Download and Install Vagrant: [Vagrant Downloads](http://www.vagrantup.com/downloads.html)
-2. Install Ansible: [Ansible Python pip install](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip)
+2. Install Ansible: [Ansible Python pip install](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#pip-install)
```bash
sudo easy_install pip
@@ -86,7 +86,6 @@ By default, Vagrant will share your project directory (the directory with the Va
Running the following commands in the guest machine should display these expected versions:
* `node --version` should report *v0.12.7*
-* `mvn --version` should report *Apache Maven 3.3.9* and *Java version: 1.7.0_85*
The virtual machine consists of:
@@ -108,7 +107,7 @@ This assumes you've already cloned the project either on the host machine in the
```bash
cd /zeppelin
-mvn clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests
+./mvnw clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests
./bin/zeppelin-daemon.sh start
```
diff --git a/docs/setup/deployment/yarn_install.md b/docs/setup/deployment/yarn_install.md
index b130272a0c1..994180126e3 100644
--- a/docs/setup/deployment/yarn_install.md
+++ b/docs/setup/deployment/yarn_install.md
@@ -118,7 +118,7 @@ bin/zeppelin-daemon.sh stop
```
## Interpreter
-Zeppelin provides various distributed processing frameworks to process data that ranges from Spark, JDBC, Ignite and Lens to name a few. This document describes to configure JDBC & Spark interpreters.
+Zeppelin provides various distributed processing frameworks to process data that ranges from Spark and JDBC to name a few. This document describes to configure JDBC & Spark interpreters.
### Hive
Zeppelin supports Hive through JDBC interpreter. You might need the information to use Hive and can find in your hive-site.xml
diff --git a/docs/setup/operation/configuration.md b/docs/setup/operation/configuration.md
index 32d501537cc..e0c769202d0 100644
--- a/docs/setup/operation/configuration.md
+++ b/docs/setup/operation/configuration.md
@@ -53,7 +53,7 @@ Sources descending by priority:
Remote principal repository for interpreter's additional dependency loading
@@ -340,8 +340,8 @@ Sources descending by priority:
ZEPPELIN_INTERPRETER_CONNECT_TIMEOUT
zeppelin.interpreter.connect.timeout
-
30000
-
Output message from interpreter exceeding the limit will be truncated
+
600s
+
Interpreter process connect timeout. Default time unit is msec
ZEPPELIN_DEP_LOCALREPO
@@ -463,6 +463,18 @@ Sources descending by priority:
comma-separated list of folder, where cron is allowed
+
+
ZEPPELIN_NOTE_CACHE_THRESHOLD
+
zeppelin.note.cache.threshold
+
50
+
Threshold for the number of notes in the cache before an eviction occurs.
+
+
+
ZEPPELIN_NOTEBOOK_VERSIONED_MODE_ENABLE
+
zeppelin.notebook.versioned.mode.enable
+
true
+
Value to enable/disable version control support in Notes.
+
diff --git a/docs/setup/operation/monitoring.md b/docs/setup/operation/monitoring.md
index 538b115c366..a2fe4434e9e 100644
--- a/docs/setup/operation/monitoring.md
+++ b/docs/setup/operation/monitoring.md
@@ -27,6 +27,7 @@ Apache Zeppelin is using [Micrometer](https://micrometer.io/) - a vendor-neutral
### Prometheus Monitoring
[Prometheus](https://prometheus.io/) is the leading monitoring solution for [Kubernetes](https://kubernetes.io/). The Prometheus endpoint can be activated with the configuration property `zeppelin.metric.enable.prometheus`. The metrics are accessible via the unauthenticated endpoint `/metrics`.
+For [Grafana](https://grafana.com/) a good starting point for a dashboard can be found in our [Github Repository](https://github.com/apache/zeppelin/blob/grafana/examples/dashboard.json).
### JMX Monitoring
diff --git a/docs/setup/operation/upgrading.md b/docs/setup/operation/upgrading.md
index 4b78ee628e7..673fcac59c7 100644
--- a/docs/setup/operation/upgrading.md
+++ b/docs/setup/operation/upgrading.md
@@ -35,6 +35,9 @@ So, copying `notebook` and `conf` directory should be enough.
## Migration Guide
+### Upgrading from Zeppelin 0.9, 0.10 to 0.11
+ - From 0.11, The type of `Pegdown` for parsing markdown was deprecated ([ZEPPELIN-5529](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). It will use `Flexmark` instead.
+
### Upgrading from Zeppelin 0.8 to 0.9
- From 0.9, we changed the notes file name structure ([ZEPPELIN-2619](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). So when you upgrading zeppelin to 0.9, you need to upgrade note files. Here's steps you need to follow:
diff --git a/docs/setup/security/shiro_authentication.md b/docs/setup/security/shiro_authentication.md
index 0e3035f74f6..ed99cf813d9 100644
--- a/docs/setup/security/shiro_authentication.md
+++ b/docs/setup/security/shiro_authentication.md
@@ -99,8 +99,8 @@ group1 = *
```
## Configure Realm (optional)
-Realms are responsible for authentication and authorization in Apache Zeppelin. By default, Apache Zeppelin uses [IniRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/text/IniRealm.html) (users and groups are configurable in `conf/shiro.ini` file under `[user]` and `[group]` section). You can also leverage Shiro Realms like [JndiLdapRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/ldap/JndiLdapRealm.html), [JdbcRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/jdbc/JdbcRealm.html) or create [our own](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/AuthorizingRealm.html).
-To learn more about Apache Shiro Realm, please check [this documentation](http://shiro.apache.org/realm.html).
+Realms are responsible for authentication and authorization in Apache Zeppelin. By default, Apache Zeppelin uses **IniRealm** (users and groups are configurable in `conf/shiro.ini` file under `[user]` and `[group]` section). You can also leverage Shiro Realms like **JndiLdapRealm**, **JdbcRealm** or create **AuthorizingRealm**.
+To learn more about Apache Shiro Realm, please check [this documentation](https://shiro.apache.org/realm.html).
We also provide community custom Realms.
@@ -151,28 +151,29 @@ The other more flexible option is to use the LdapRealm. It allows for mapping of
[main]
ldapRealm=org.apache.zeppelin.realm.LdapRealm
-ldapRealm.contextFactory.authenticationMechanism=simple
-ldapRealm.contextFactory.url=ldap://localhost:33389
-ldapRealm.userDnTemplate=uid={0},ou=people,dc=hadoop,dc=apache,dc=org
+ldapRealm.contextFactory.authenticationMechanism = simple
+ldapRealm.contextFactory.url = ldap://localhost:33389
+ldapRealm.userDnTemplate = uid={0},ou=people,dc=hadoop,dc=apache,dc=org
# Ability to set ldap paging Size if needed default is 100
ldapRealm.pagingSize = 200
-ldapRealm.authorizationEnabled=true
-ldapRealm.contextFactory.systemAuthenticationMechanism=simple
-ldapRealm.searchBase=dc=hadoop,dc=apache,dc=org
+ldapRealm.authorizationEnabled = true
+ldapRealm.searchBase = dc=hadoop,dc=apache,dc=org
ldapRealm.userSearchBase = dc=hadoop,dc=apache,dc=org
ldapRealm.groupSearchBase = ou=groups,dc=hadoop,dc=apache,dc=org
-ldapRealm.groupObjectClass=groupofnames
+ldapRealm.groupObjectClass = groupofnames
# Allow userSearchAttribute to be customized
+# If userSearchAttributeName was configured, Zeppelin would use userObjectClass and userSearchAttributeName to search for an actual user DN
+# Otherwise, memberAttributeValueTemplate would be used to construct the user DN.
ldapRealm.userSearchAttributeName = sAMAccountName
-ldapRealm.memberAttribute=member
+ldapRealm.memberAttribute = member
# force usernames returned from ldap to lowercase useful for AD
ldapRealm.userLowerCase = true
# ability set searchScopes subtree (default), one, base
ldapRealm.userSearchScope = subtree;
ldapRealm.groupSearchScope = subtree;
-ldapRealm.memberAttributeValueTemplate=cn={0},ou=people,dc=hadoop,dc=apache,dc=org
-ldapRealm.contextFactory.systemUsername=uid=guest,ou=people,dc=hadoop,dc=apache,dc=org
-ldapRealm.contextFactory.systemPassword=S{ALIAS=ldcSystemPassword}
+ldapRealm.memberAttributeValueTemplate = cn={0},ou=people,dc=hadoop,dc=apache,dc=org
+ldapRealm.contextFactory.systemUsername = uid=guest,ou=people,dc=hadoop,dc=apache,dc=org
+ldapRealm.contextFactory.systemPassword = S{ALIAS=ldcSystemPassword}
# enable support for nested groups using the LDAP_MATCHING_RULE_IN_CHAIN operator
ldapRealm.groupSearchEnableMatchingRuleInChain = true
# optional mapping from physical groups to logical application roles
@@ -180,7 +181,7 @@ ldapRealm.rolesByGroup = LDN_USERS: user_role, NYK_USERS: user_role, HKG_USERS:
# optional list of roles that are allowed to authenticate. Incase not present all groups are allowed to authenticate (login).
# This changes nothing for url specific permissions that will continue to work as specified in [urls].
ldapRealm.allowedRolesForAuthentication = admin_role,user_role
-ldapRealm.permissionsByRole= user_role = *:ToDoItemsJdo:*:*, *:ToDoItem:*:*; admin_role = *
+ldapRealm.permissionsByRole = user_role = *:ToDoItemsJdo:*:*, *:ToDoItem:*:*; admin_role = *
securityManager.sessionManager = $sessionManager
securityManager.realms = $ldapRealm
```
@@ -199,8 +200,8 @@ ldapRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/conf/zeppeli
### PAM
[PAM](https://en.wikipedia.org/wiki/Pluggable_authentication_module) authentication support allows the reuse of existing authentication
-moduls on the host where Zeppelin is running. On a typical system modules are configured per service for example sshd, passwd, etc. under `/etc/pam.d/`. You can
-either reuse one of these services or create your own for Zeppelin. Activiting PAM authentication requires two parameters:
+modules on the host where Zeppelin is running. On a typical system modules are configured per service for example sshd, passwd, etc. under `/etc/pam.d/`. You can
+either reuse one of these services or create your own for Zeppelin. Activating PAM authentication requires two parameters:
1. realm: The Shiro realm being used
2. service: The service configured under `/etc/pam.d/` to be used. The name here needs to be the same as the file name under `/etc/pam.d/`
@@ -210,24 +211,11 @@ either reuse one of these services or create your own for Zeppelin. Activiting P
pamRealm.service=sshd
```
-### ZeppelinHub
-[ZeppelinHub](https://www.zeppelinhub.com) is a service that synchronize your Apache Zeppelin notebooks and enables you to collaborate easily.
-
-To enable login with your ZeppelinHub credential, apply the following change in `conf/shiro.ini` under `[main]` section.
-
-```
-### A sample for configuring ZeppelinHub Realm
-zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
-## Url of ZeppelinHub
-zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
-securityManager.realms = $zeppelinHubRealm
-```
-
-> Note: ZeppelinHub is not related to Apache Zeppelin project.
-
### Knox SSO
[KnoxSSO](https://knox.apache.org/books/knox-0-13-0/dev-guide.html#KnoxSSO+Integration) provides an abstraction for integrating any number of authentication systems and SSO solutions and enables participating web applications to scale to those solutions more easily. Without the token exchange capabilities offered by KnoxSSO each component UI would need to integrate with each desired solution on its own.
+When Knox SSO is enabled for Zeppelin, the [Apache Hadoop Groups Mapping](https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html) configuration will used internally to determine the group memberships of the user who is trying to log in. Role-based access permission can be set based on groups as seen by Hadoop.
+
To enable this, apply the following change in `conf/shiro.ini` under `[main]` section.
```
@@ -249,7 +237,7 @@ authc = org.apache.zeppelin.realm.jwt.KnoxAuthenticationFilter
### HTTP SPNEGO Authentication
HTTP SPNEGO (Simple and Protected GSS-API NEGOtiation) is the standard way to support Kerberos Ticket based user authentication for Web Services. Based on [Apache Hadoop Auth](https://hadoop.apache.org/docs/current/hadoop-auth/index.html), Zeppelin supports ability to authenticate users by accepting and validating their Kerberos Ticket.
-When HTTP SPNEGO Authentication is enabled for Zeppelin, the [Apache Hadoop Groups Mapping](https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html) configuration will used internally to determine group membership of user who is trying to log in. Role-based access permission can be set based on groups as seen by Hadoop.
+When HTTP SPNEGO Authentication is enabled for Zeppelin, the [Apache Hadoop Groups Mapping](https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html) configuration will used internally to determine the group memberships of the user who is trying to log in. Role-based access permission can be set based on groups as seen by Hadoop.
To enable this, apply the following change in `conf/shiro.ini` under `[main]` section.
@@ -266,7 +254,9 @@ authc = org.apache.zeppelin.realm.kerberos.KerberosAuthenticationFilter
```
For above configuration to work, user need to do some more configurations outside Zeppelin.
-1). A valid SPNEGO keytab should be available on the Zeppelin node and should be readable by 'zeppelin' user. If there is a SPNEGO keytab already available (because of other Hadoop service), it can be reused here and no need to generate a new keytab. An example of working SPNEGO keytab could be:
+1. A valid SPNEGO keytab should be available on the Zeppelin node and should be readable by 'zeppelin' user. If there is a SPNEGO keytab already available (because of another Hadoop service), it can be reused here without generating a new keytab.
+An example of working SPNEGO keytab could be:
+
```
$ klist -kt /etc/security/keytabs/spnego.service.keytab
Keytab name: FILE:/etc/security/keytabs/spnego.service.keytab
@@ -277,16 +267,19 @@ KVNO Timestamp Principal
2 11/26/2018 16:58:38 HTTP/zeppelin.fqdn.domain.com@EXAMPLE.COM
2 11/26/2018 16:58:38 HTTP/zeppelin.fqdn.domain.com@EXAMPLE.COM
```
-and the keytab permission should be: (VERY IMPORTANT to not to set this to 777 or readable by all !!!):
+
+Ensure that the keytab premissions are sufficiently strict while still readable by the 'zeppelin' user:
+
```
$ ls -l /etc/security/keytabs/spnego.service.keytab
-r--r-----. 1 root hadoop 346 Nov 26 16:58 /etc/security/keytabs/spnego.service.keytab
```
-Above 'zeppelin' user happens to be member of 'hadoop' group.
-2). A secret signature file must be present on Zeppelin node (readable to 'zeppelin' user). This file contains the random binary numbers which is used to sign 'hadoop.auth' cookie, generated during SPNEGO exchange. If such a file is already generated and available on the Zeppelin node, it should be used rather than generating a new file.
+Note that for the above example, the 'zeppelin' user can read the keytab because they are a member of the 'hadoop' group.
+2. A secret signature file must be present on Zeppelin node, readable by 'zeppelin' user. This file contains the random binary numbers which is used to sign 'hadoop.auth' cookie, generated during SPNEGO exchange. If such a file is already generated and available on the Zeppelin node, it should be used rather than generating a new file.
Commands to generate a secret signature file (if required):
+
```
dd if=/dev/urandom of=/etc/security/http_secret bs=1024 count=1
chown hdfs:hadoop /etc/security/http_secret
diff --git a/docs/setup/storage/configuration_storage.md b/docs/setup/storage/configuration_storage.md
new file mode 100644
index 00000000000..3a5bbff9dfb
--- /dev/null
+++ b/docs/setup/storage/configuration_storage.md
@@ -0,0 +1,65 @@
+---
+layout: page
+title: "Configuration Storage for Apache Zeppelin"
+description: "Configuration Storage for Apache Zeppelin"
+group: setup/storage
+---
+
+{% include JB/setup %}
+
+# Configuration Storage for Apache Zeppelin
+
+
+
+## Overview
+Zeppelin has lots of configuration which is stored in files:
+- `interpreter.json` (This file contains all the interpreter setting info)
+- `notebook-authorization.json` (This file contains all the note authorization info)
+- `credential.json` (This file contains the credential info)
+
+## Configuration Storage in hadoop compatible file system
+
+Set following properties in `zeppelin-site.xml`:
+```xml
+
+ zeppelin.config.storage.class
+ org.apache.zeppelin.storage.FileSystemConfigStorage
+ configuration persistence layer implementation
+
+
+ zeppelin.config.fs.dir
+
+ path on the hadoop compatible file system
+
+```
+Also specify `HADOOP_CONF_DIR` in `zeppelin-env.sh` so that Zeppelin can find the right hadoop configuration files.
+
+If your hadoop cluster is kerberized, then you need to specify `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal`
+
+
+## Configuration Storage in local file system
+By default, zeppelin store configuration on local file system.
+```xml
+
+ zeppelin.config.storage.class
+ org.apache.zeppelin.storage.LocalConfigStorage
+ configuration persistence layer implementation
+
+
+ zeppelin.config.fs.dir
+
+ path on local file system
+
+```
\ No newline at end of file
diff --git a/docs/setup/storage/storage.md b/docs/setup/storage/notebook_storage.md
similarity index 92%
rename from docs/setup/storage/storage.md
rename to docs/setup/storage/notebook_storage.md
index f53fa6b8df4..e7a5b26ccc0 100644
--- a/docs/setup/storage/storage.md
+++ b/docs/setup/storage/notebook_storage.md
@@ -1,7 +1,7 @@
---
layout: page
title: "Notebook Storage for Apache Zeppelin"
-description: Apache Zeppelin has a pluggable notebook storage mechanism controlled by zeppelin.notebook.storage configuration option with multiple implementations."
+description: "Apache Zeppelin has a pluggable notebook storage mechanism controlled by zeppelin.notebook.storage configuration option with multiple implementations."
group: setup/storage
---
-
-```
-
-or set the environment variable in the file **zeppelin-env.sh**:
-
-```bash
-export ZEPPELIN_NOTEBOOK_STORAGE="org.apache.zeppelin.notebook.repo.GitNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo"
-```
-
-Secondly, you need to set the environment variables in the file **zeppelin-env.sh**:
-
-```bash
-export ZEPPELINHUB_API_TOKEN=ZeppelinHub token
-export ZEPPELINHUB_API_ADDRESS=address of ZeppelinHub service (e.g. https://www.zeppelinhub.com)
-```
-
-You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token).
-
-
## Notebook Storage in MongoDB
Using `MongoNotebookRepo`, you can store your notebook in [MongoDB](https://www.mongodb.com/).
diff --git a/docs/usage/display_system/basic.md b/docs/usage/display_system/basic.md
index 5080fffcbca..f0a4dde9654 100644
--- a/docs/usage/display_system/basic.md
+++ b/docs/usage/display_system/basic.md
@@ -1,7 +1,7 @@
---
layout: page
title: "Basic Display System in Apache Zeppelin"
-description: "There are 3 basic display systems in Apache Zeppelin. By default, Zeppelin prints interpreter responce as a plain text using text display system. With %html directive, Zeppelin treats your output as HTML. You can also simply use %table display system to leverage Zeppelin's built in visualization."
+description: "There are several display systems available in Apache Zeppelin. By default, Zeppelin prints interpreter response as a plain text using %text display system. However, display systems for showing HTML, tables, markdown or even graph visualizations are also available."
group: usage/display_system
---
+ 1.3
@@ -81,6 +83,16 @@
${unirest.version}
+
+ org.json
+ json
+
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
@@ -88,9 +100,6 @@
maven-enforcer-plugin
-
- maven-dependency-plugin
- maven-resources-plugin
@@ -100,9 +109,6 @@
org.apache.maven.pluginsmaven-checkstyle-plugin
-
- false
-
diff --git a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java
index 45b37c4ebc6..69f5b05679b 100644
--- a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java
+++ b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java
@@ -22,6 +22,11 @@
import com.google.gson.JsonObject;
import org.apache.commons.lang3.StringUtils;
+import org.apache.zeppelin.elasticsearch.client.ElasticsearchClient;
+import org.apache.zeppelin.elasticsearch.client.ElasticsearchClientType;
+import org.apache.zeppelin.elasticsearch.client.ElasticsearchClientTypeBuilder;
+import org.apache.zeppelin.elasticsearch.client.HttpBasedClient;
+import org.apache.zeppelin.elasticsearch.client.TransportBasedClient;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
@@ -55,19 +60,18 @@
import org.apache.zeppelin.elasticsearch.action.ActionResponse;
import org.apache.zeppelin.elasticsearch.action.AggWrapper;
import org.apache.zeppelin.elasticsearch.action.HitWrapper;
-import org.apache.zeppelin.elasticsearch.client.ElasticsearchClient;
-import org.apache.zeppelin.elasticsearch.client.HttpBasedClient;
-import org.apache.zeppelin.elasticsearch.client.TransportBasedClient;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
+import static org.apache.zeppelin.elasticsearch.client.ElasticsearchClientType.TRANSPORT;
+
/**
* Elasticsearch Interpreter for Zeppelin.
*/
public class ElasticsearchInterpreter extends Interpreter {
- private static Logger logger = LoggerFactory.getLogger(ElasticsearchInterpreter.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchInterpreter.class);
private static final String HELP = "Elasticsearch interpreter:\n"
+ "General format: ///
diff --git a/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java b/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java
index eea5650f00d..8275215efa5 100644
--- a/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java
+++ b/file/src/main/java/org/apache/zeppelin/file/FileInterpreter.java
@@ -44,7 +44,7 @@
*
*/
public abstract class FileInterpreter extends Interpreter {
- Logger logger = LoggerFactory.getLogger(FileInterpreter.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(FileInterpreter.class);
String currentDir = null;
CommandArgs args = null;
@@ -108,13 +108,13 @@ protected String getNewPath(String argument){
// Handle the command handling uniformly across all file systems
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
- logger.info("Run File command '" + cmd + "'");
+ LOGGER.info("Run File command '" + cmd + "'");
args = new CommandArgs(cmd);
args.parseArgs();
if (args.command == null) {
- logger.info("Error: No command");
+ LOGGER.info("Error: No command");
return new InterpreterResult(Code.ERROR, Type.TEXT, "No command");
}
@@ -133,7 +133,7 @@ public InterpreterResult interpret(String cmd, InterpreterContext contextInterpr
String results = listAll(newPath);
return new InterpreterResult(Code.SUCCESS, Type.TEXT, results);
} catch (Exception e) {
- logger.error("Error listing files in path " + newPath, e);
+ LOGGER.error("Error listing files in path " + newPath, e);
return new InterpreterResult(Code.ERROR, Type.TEXT, e.getMessage());
}
diff --git a/file/src/main/java/org/apache/zeppelin/file/HDFSFileInterpreter.java b/file/src/main/java/org/apache/zeppelin/file/HDFSFileInterpreter.java
index 2a13b3c1457..7b0b1bec934 100644
--- a/file/src/main/java/org/apache/zeppelin/file/HDFSFileInterpreter.java
+++ b/file/src/main/java/org/apache/zeppelin/file/HDFSFileInterpreter.java
@@ -33,11 +33,14 @@
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* HDFS implementation of File interpreter for Zeppelin.
*/
public class HDFSFileInterpreter extends FileInterpreter {
+ private static final Logger LOGGER = LoggerFactory.getLogger(HDFSFileInterpreter.class);
static final String HDFS_URL = "hdfs.url";
static final String HDFS_USER = "hdfs.user";
static final String HDFS_MAXLENGTH = "hdfs.maxlength";
@@ -50,7 +53,7 @@ public void prepare() {
String userName = getProperty(HDFS_USER);
String hdfsUrl = getProperty(HDFS_URL);
int i = Integer.parseInt(getProperty(HDFS_MAXLENGTH));
- cmd = new HDFSCommand(hdfsUrl, userName, logger, i);
+ cmd = new HDFSCommand(hdfsUrl, userName, LOGGER, i);
gson = new Gson();
}
@@ -66,9 +69,9 @@ public HDFSFileInterpreter(Properties property){
*/
public class OneFileStatus {
public long accessTime;
- public int blockSize;
+ public long blockSize;
public int childrenNum;
- public int fileId;
+ public long fileId;
public String group;
public long length;
public long modificationTime;
@@ -132,10 +135,10 @@ public class AllFileStatus {
private void testConnection() {
try {
if (isDirectory("/")) {
- logger.info("Successfully created WebHDFS connection");
+ LOGGER.info("Successfully created WebHDFS connection");
}
} catch (Exception e) {
- logger.error("testConnection: Cannot open WebHDFS connection. Bad URL: " + "/", e);
+ LOGGER.error("testConnection: Cannot open WebHDFS connection. Bad URL: " + "/", e);
exceptionOnConnect = e;
}
}
@@ -212,7 +215,7 @@ public String listFile(String filePath) {
return listOne(filePath, sfs.fileStatus);
}
} catch (Exception e) {
- logger.error("listFile: " + filePath, e);
+ LOGGER.error("listFile: " + filePath, e);
}
return "No such File or directory";
}
@@ -246,7 +249,7 @@ public String listAll(String path) throws InterpreterException {
return listFile(path);
}
} catch (Exception e) {
- logger.error("listall: listDir " + path, e);
+ LOGGER.error("listall: listDir " + path, e);
throw new InterpreterException("Could not find file or directory:\t" + path);
}
}
@@ -264,7 +267,7 @@ public boolean isDirectory(String path) {
return sfs.fileStatus.type.equals("DIRECTORY");
}
} catch (Exception e) {
- logger.error("IsDirectory: " + path, e);
+ LOGGER.error("IsDirectory: " + path, e);
return false;
}
return ret;
@@ -273,7 +276,7 @@ public boolean isDirectory(String path) {
@Override
public List completion(String buf, int cursor,
InterpreterContext interpreterContext) {
- logger.info("Completion request at position\t" + cursor + " in string " + buf);
+ LOGGER.info("Completion request at position\t" + cursor + " in string " + buf);
final List suggestions = new ArrayList<>();
if (StringUtils.isEmpty(buf)) {
suggestions.add(new InterpreterCompletion("ls", "ls", CompletionType.command.name()));
@@ -337,11 +340,11 @@ public List completion(String buf, int cursor,
}
}
} catch (Exception e) {
- logger.error("listall: listDir " + globalPath, e);
+ LOGGER.error("listall: listDir " + globalPath, e);
return null;
}
} else {
- logger.info("path is not a directory. No values suggested.");
+ LOGGER.info("path is not a directory. No values suggested.");
}
//Error in string.
diff --git a/file/src/test/java/org/apache/zeppelin/file/HDFSFileInterpreterTest.java b/file/src/test/java/org/apache/zeppelin/file/HDFSFileInterpreterTest.java
index aa698866f24..515babecd3f 100644
--- a/file/src/test/java/org/apache/zeppelin/file/HDFSFileInterpreterTest.java
+++ b/file/src/test/java/org/apache/zeppelin/file/HDFSFileInterpreterTest.java
@@ -18,14 +18,13 @@
package org.apache.zeppelin.file;
-import static org.junit.Assert.assertNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
import com.google.gson.Gson;
-import junit.framework.TestCase;
-
-import org.junit.Test;
import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.HashMap;
@@ -35,13 +34,14 @@
import org.apache.zeppelin.completer.CompletionType;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
+import org.junit.jupiter.api.Test;
/**
* Tests Interpreter by running pre-determined commands against mock file system.
*/
-public class HDFSFileInterpreterTest extends TestCase {
+class HDFSFileInterpreterTest {
@Test
- public void testMaxLength() {
+ void testMaxLength() {
HDFSFileInterpreter t = new MockHDFSFileInterpreter(new Properties());
t.open();
InterpreterResult result = t.interpret("ls -l /", null);
@@ -61,7 +61,7 @@ public void testMaxLength() {
}
@Test
- public void test() {
+ void test() {
HDFSFileInterpreter t = new MockHDFSFileInterpreter(new Properties());
t.open();
@@ -72,58 +72,58 @@ public void test() {
// 2. paths (. and ..) are correctly handled
// 3. flags and arguments to commands are correctly handled
InterpreterResult result1 = t.interpret("ls -l /", null);
- assertEquals(result1.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result1.message().get(0).getType());
InterpreterResult result2 = t.interpret("ls -l /./user/..", null);
- assertEquals(result2.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result2.message().get(0).getType());
assertEquals(result1.message().get(0).getData(), result2.message().get(0).getData());
// Ensure you can do cd and after that the ls uses current directory correctly
InterpreterResult result3 = t.interpret("cd user", null);
- assertEquals(result3.message().get(0).getType(), InterpreterResult.Type.TEXT);
- assertEquals(result3.message().get(0).getData(), "OK");
+ assertEquals(InterpreterResult.Type.TEXT, result3.message().get(0).getType());
+ assertEquals("OK", result3.message().get(0).getData());
InterpreterResult result4 = t.interpret("ls", null);
- assertEquals(result4.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result4.message().get(0).getType());
InterpreterResult result5 = t.interpret("ls /user", null);
- assertEquals(result5.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result5.message().get(0).getType());
assertEquals(result4.message().get(0).getData(), result5.message().get(0).getData());
// Ensure pwd works correctly
InterpreterResult result6 = t.interpret("pwd", null);
- assertEquals(result6.message().get(0).getType(), InterpreterResult.Type.TEXT);
- assertEquals(result6.message().get(0).getData(), "/user");
+ assertEquals(InterpreterResult.Type.TEXT, result6.message().get(0).getType());
+ assertEquals("/user", result6.message().get(0).getData());
// Move a couple of levels and check we're in the right place
InterpreterResult result7 = t.interpret("cd ../mr-history/done", null);
- assertEquals(result7.message().get(0).getType(), InterpreterResult.Type.TEXT);
- assertEquals(result7.message().get(0).getData(), "OK");
+ assertEquals(InterpreterResult.Type.TEXT, result7.message().get(0).getType());
+ assertEquals("OK", result7.message().get(0).getData());
InterpreterResult result8 = t.interpret("ls -l ", null);
- assertEquals(result8.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result8.message().get(0).getType());
InterpreterResult result9 = t.interpret("ls -l /mr-history/done", null);
- assertEquals(result9.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result9.message().get(0).getType());
assertEquals(result8.message().get(0).getData(), result9.message().get(0).getData());
InterpreterResult result10 = t.interpret("cd ../..", null);
- assertEquals(result10.message().get(0).getType(), InterpreterResult.Type.TEXT);
- assertEquals(result7.message().get(0).getData(), "OK");
+ assertEquals(InterpreterResult.Type.TEXT, result10.message().get(0).getType());
+ assertEquals("OK", result7.message().get(0).getData());
InterpreterResult result11 = t.interpret("ls -l ", null);
- assertEquals(result11.message().get(0).getType(), InterpreterResult.Type.TEXT);
+ assertEquals(InterpreterResult.Type.TEXT, result11.message().get(0).getType());
// we should be back to first result after all this navigation
assertEquals(result1.message().get(0).getData(), result11.message().get(0).getData());
// auto completion test
- List expectedResultOne = Arrays.asList(
+ List expectedResultOne = Arrays.asList(
new InterpreterCompletion("ls", "ls", CompletionType.command.name()));
- List expectedResultTwo = Arrays.asList(
+ List expectedResultTwo = Arrays.asList(
new InterpreterCompletion("pwd", "pwd", CompletionType.command.name()));
List resultOne = t.completion("l", 0, null);
List resultTwo = t.completion("p", 0, null);
@@ -141,7 +141,7 @@ public void test() {
class MockFileSystem {
HashMap mfs = new HashMap<>();
static final String FILE_STATUSES =
- "{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16389," +
+ "{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":4947954640," +
"\"group\":\"hadoop\",\"length\":0,\"modificationTime\":1438548219672," +
"\"owner\":\"yarn\",\"pathSuffix\":\"app-logs\",\"permission\":\"777\"," +
"\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
@@ -184,7 +184,7 @@ void addListStatusData() {
);
mfs.put("/tmp?op=LISTSTATUS",
"{\"FileStatuses\":{\"FileStatus\":[\n" +
- " {\"accessTime\":1441253097489,\"blockSize\":134217728,\"childrenNum\":0," +
+ " {\"accessTime\":1441253097489,\"blockSize\":2147483648,\"childrenNum\":0," +
"\"fileId\":16400,\"group\":\"hdfs\",\"length\":1645," +
"\"modificationTime\":1441253097517,\"owner\":\"hdfs\"," +
"\"pathSuffix\":\"ida8c06540_date040315\",\"permission\":\"755\"," +
@@ -275,12 +275,14 @@ public String runCommand(Op op, String path, Arg[] args) throws Exception {
* Mock Interpreter - uses Mock HDFS command.
*/
class MockHDFSFileInterpreter extends HDFSFileInterpreter {
+ private static final Logger LOGGER = LoggerFactory.getLogger(MockHDFSFileInterpreter.class);
+
@Override
public void prepare() {
// Run commands against mock File System instead of WebHDFS
int i = Integer.parseInt(getProperty(HDFS_MAXLENGTH) == null ? "1000"
: getProperty(HDFS_MAXLENGTH));
- cmd = new MockHDFSCommand("", "", logger, i);
+ cmd = new MockHDFSCommand("", "", LOGGER, i);
gson = new Gson();
}
diff --git a/flink-cmd/pom.xml b/flink-cmd/pom.xml
index fae79c85d4b..4f4456f8dc7 100644
--- a/flink-cmd/pom.xml
+++ b/flink-cmd/pom.xml
@@ -23,7 +23,7 @@
zeppelin-interpreter-parentorg.apache.zeppelin
- 0.10.0-SNAPSHOT
+ 0.12.0-SNAPSHOT../zeppelin-interpreter-parent/pom.xml
@@ -44,14 +44,7 @@
org.apache.hadoop
- hadoop-common
- ${hadoop.version}
- provided
-
-
-
- org.apache.hadoop
- hadoop-yarn-client
+ hadoop-client-runtime${hadoop.version}provided
@@ -62,18 +55,12 @@
maven-enforcer-plugin
-
- maven-dependency-plugin
- maven-resources-pluginmaven-shade-plugin
-
- maven-checkstyle-plugin
-
diff --git a/flink/README.md b/flink/README.md
index e8e7dd946d4..3b120bf3140 100644
--- a/flink/README.md
+++ b/flink/README.md
@@ -6,47 +6,24 @@ This is the doc for Zeppelin developers who want to work on flink interpreter.
### Project Structure
-Flink interpreter is more complex than other interpreter (such as jdbc, shell). Currently it has following 8 modules
+Flink interpreter is more complex than other interpreter (such as jdbc, shell).
+Currently, it has the following modules clustered into two groups:
+
* flink-shims
-* flink1.10-shims
-* flink1.11-shims
-* flink1.12-shims
-* flink1.13-shims
-* flink-scala-parent
-* flink-scala-2.11
+* flink1.15-shims
+* flink1.16-shims
+* flink1.17-shims
+
* flink-scala-2.12
-The first 5 modules are to adapt different flink versions because there're some api changes between different versions of flink.
+The modules in the first group are to adapt different flink versions because there're some api changes between different versions of flink.
`flink-shims` is parent module for other shims modules.
At runtime Flink interpreter will load the FlinkShims based on the current flink versions (See `FlinkShims#loadShims`).
-The remaining 3 modules are to adapt different scala versions (Apache Flink supports 2 scala versions: 2.11 & 2.12).
-`flink-scala-parent` is a parent module for `flink-scala-2.11` and `flink-scala-2.12`. It contains common code for both `flink-scala-2.11` and `flink-scala-2.12`.
-There's symlink folder `flink-scala-parent` under `flink-scala-2.11` and `flink-scala-2.12`.
-When you run maven command to build flink interpreter, the source code in `flink-scala-parent` won't be compiled directly, instead
-they will be compiled against different scala versions when building `flink-scala-2.11` & `flink-scala-2.12`. (See `build-helper-maven-plugin` in `pom.xml`)
-Both `flink-scala-2.11` and `flink-scala-2.12` build a flink interpreter jar and `FlinkInterpreterLauncher` in `zeppelin-plugins/launcher/flink` will choose the right jar based
-on the scala version of flink.
+The modules in the second group are to adapt different scala versions. But since flink 1.15, it only supports Scala 2.12, thus there is only one module `flink-scala-2.12`
### Work in IDE
-Because of the complex project structure of flink interpreter, we need to do more configuration to make it work in IDE.
-Here we take Intellij as an example (other IDE should be similar).
-
-The key point is that we can only make flink interpreter work with one scala version at the same time in IDE.
-So we have to disable the other module when working with one specific scala version module.
-
-#### Make it work with scala-2.11
-
-1. Exclude the source code folder (java/scala) of `flink-scala-parent` (Right click these folder -> Mark directory As -> Excluded)
-2. Include the source code folder (java/scala) of `flink/flink-scala-2.11/flink-scala-parent` (Right click these folder -> Mark directory As -> Source root)
-
-#### Make it work with scala-2.12
-
-1. Exclude the source code folder (java/scala) of `flink-scala-parent` (Right click these folder -> Mark directory As -> Excluded)
-2. Include the source code folder (java/scala) of `flink/flink-scala-2.12/flink-scala-parent` (Right click these folder -> Mark directory As -> Source root)
-
-
#### How to run unit test in IDE
Take `FlinkInterpreterTest` as an example, you need to specify environment variables `FLINK_HOME`, `FLINK_CONF_DIR`, `ZEPPELIN_HOME`.
diff --git a/flink/flink-scala-2.11/flink-scala-parent b/flink/flink-scala-2.11/flink-scala-parent
deleted file mode 120000
index 3dfa859ba33..00000000000
--- a/flink/flink-scala-2.11/flink-scala-parent
+++ /dev/null
@@ -1 +0,0 @@
-../flink-scala-parent
\ No newline at end of file
diff --git a/flink/flink-scala-2.11/pom.xml b/flink/flink-scala-2.11/pom.xml
deleted file mode 100644
index cf8231a843f..00000000000
--- a/flink/flink-scala-2.11/pom.xml
+++ /dev/null
@@ -1,96 +0,0 @@
-
-
-
-
- org.apache.zeppelin
- flink-scala-parent
- 0.10.0-SNAPSHOT
- ../flink-scala-parent/pom.xml
-
-
- 4.0.0
- org.apache.zeppelin
- flink-scala-2.11
- 0.10.0-SNAPSHOT
- jar
- Zeppelin: Flink Interpreter Scala_2.11
-
-
- 2.11.12
- 2.11
- ${flink.scala.version}
-
-
-
-
-
- org.codehaus.mojo
- build-helper-maven-plugin
-
-
-
- maven-resources-plugin
-
-
-
- org.codehaus.mojo
- build-helper-maven-plugin
-
-
-
- net.alchim31.maven
- scala-maven-plugin
-
-
-
- com.googlecode.maven-download-plugin
- download-maven-plugin
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
-
-
-
- org.scalatest
- scalatest-maven-plugin
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
-
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
-
-
diff --git a/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkExprTyper.scala b/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkExprTyper.scala
deleted file mode 100644
index d61bcbcc4f2..00000000000
--- a/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkExprTyper.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.zeppelin.flink
-
-import scala.tools.nsc.interpreter.{ExprTyper, IR}
-
-trait FlinkExprTyper extends ExprTyper {
-
- import repl._
- import global.{Import => _, reporter => _, _}
- import naming.freshInternalVarName
-
- def doInterpret(code: String): IR.Result = {
- // interpret/interpretSynthetic may change the phase,
- // which would have unintended effects on types.
- val savedPhase = phase
- try interpretSynthetic(code) finally phase = savedPhase
- }
-
- override def symbolOfLine(code: String): Symbol = {
- def asExpr(): Symbol = {
- val name = freshInternalVarName()
- // Typing it with a lazy val would give us the right type, but runs
- // into compiler bugs with things like existentials, so we compile it
- // behind a def and strip the NullaryMethodType which wraps the expr.
- val line = "def " + name + " = " + code
-
- doInterpret(line) match {
- case IR.Success =>
- val sym0 = symbolOfTerm(name)
- // drop NullaryMethodType
- sym0.cloneSymbol setInfo exitingTyper(sym0.tpe_*.finalResultType)
- case _ => NoSymbol
- }
- }
-
- def asDefn(): Symbol = {
- val old = repl.definedSymbolList.toSet
-
- doInterpret(code) match {
- case IR.Success =>
- repl.definedSymbolList filterNot old match {
- case Nil => NoSymbol
- case sym :: Nil => sym
- case syms => NoSymbol.newOverloaded(NoPrefix, syms)
- }
- case _ => NoSymbol
- }
- }
-
- def asError(): Symbol = {
- doInterpret(code)
- NoSymbol
- }
-
- beSilentDuring(asExpr()) orElse beSilentDuring(asDefn()) orElse asError()
- }
-
-}
diff --git a/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkILoopInterpreter.scala b/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkILoopInterpreter.scala
deleted file mode 100644
index 08cb0c0db12..00000000000
--- a/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkILoopInterpreter.scala
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.zeppelin.flink
-
-import scala.collection.mutable
-import scala.tools.nsc.Settings
-import scala.tools.nsc.interpreter._
-
-class FlinkILoopInterpreter(settings: Settings, out: JPrintWriter) extends IMain(settings, out) {
- self =>
-
- override lazy val memberHandlers = new {
- val intp: self.type = self
- } with MemberHandlers {
- import intp.global._
-
- override def chooseHandler(member: intp.global.Tree): MemberHandler = member match {
- case member: Import => new FlinkImportHandler(member)
- case _ => super.chooseHandler(member)
- }
-
- class FlinkImportHandler(imp: Import) extends ImportHandler(imp: Import) {
-
- override def targetType: Type = intp.global.rootMirror.getModuleIfDefined("" + expr) match {
- case NoSymbol => intp.typeOfExpression("" + expr)
- case sym => sym.tpe
- }
-
- private def safeIndexOf(name: Name, s: String): Int = fixIndexOf(name, pos(name, s))
- private def fixIndexOf(name: Name, idx: Int): Int = if (idx == name.length) -1 else idx
- private def pos(name: Name, s: String): Int = {
- var i = name.pos(s.charAt(0), 0)
- val sLen = s.length()
- if (sLen == 1) return i
- while (i + sLen <= name.length) {
- var j = 1
- while (s.charAt(j) == name.charAt(i + j)) {
- j += 1
- if (j == sLen) return i
- }
- i = name.pos(s.charAt(0), i + 1)
- }
- name.length
- }
-
- private def isFlattenedSymbol(sym: Symbol): Boolean =
- sym.owner.isPackageClass &&
- sym.name.containsName(nme.NAME_JOIN_STRING) &&
- sym.owner.info.member(sym.name.take(
- safeIndexOf(sym.name, nme.NAME_JOIN_STRING))) != NoSymbol
-
- private def importableTargetMembers =
- importableMembers(exitingTyper(targetType)).filterNot(isFlattenedSymbol).toList
-
- def isIndividualImport(s: ImportSelector): Boolean =
- s.name != nme.WILDCARD && s.rename != nme.WILDCARD
- def isWildcardImport(s: ImportSelector): Boolean =
- s.name == nme.WILDCARD
-
- // non-wildcard imports
- private def individualSelectors = selectors filter isIndividualImport
-
- override val importsWildcard: Boolean = selectors exists isWildcardImport
-
- lazy val importableSymbolsWithRenames: List[(Symbol, Name)] = {
- val selectorRenameMap =
- individualSelectors.flatMap(x => x.name.bothNames zip x.rename.bothNames).toMap
- importableTargetMembers flatMap (m => selectorRenameMap.get(m.name) map (m -> _))
- }
-
- override lazy val individualSymbols: List[Symbol] = importableSymbolsWithRenames map (_._1)
- override lazy val wildcardSymbols: List[Symbol] =
- if (importsWildcard) importableTargetMembers else Nil
-
- }
-
- }
-
- object expressionTyper extends {
- val repl: FlinkILoopInterpreter.this.type = self
- } with FlinkExprTyper { }
-
- override def symbolOfLine(code: String): global.Symbol =
- expressionTyper.symbolOfLine(code)
-
- override def typeOfExpression(expr: String, silent: Boolean): global.Type =
- expressionTyper.typeOfExpression(expr, silent)
-
-
- import global.Name
- override def importsCode(wanted: Set[Name], wrapper: Request#Wrapper,
- definesClass: Boolean, generousImports: Boolean): ComputedImports = {
-
- import global._
- import definitions.PredefModule
- import memberHandlers._
-
- val header, code, trailingBraces, accessPath = new StringBuilder
- val currentImps = mutable.HashSet[Name]()
- // only emit predef import header if name not resolved in history, loosely
- var predefEscapes = false
-
- /**
- * Narrow down the list of requests from which imports
- * should be taken. Removes requests which cannot contribute
- * useful imports for the specified set of wanted names.
- */
- case class ReqAndHandler(req: Request, handler: MemberHandler)
-
- def reqsToUse: List[ReqAndHandler] = {
- /**
- * Loop through a list of MemberHandlers and select which ones to keep.
- * 'wanted' is the set of names that need to be imported.
- */
- def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
- // Single symbol imports might be implicits! See bug #1752. Rather than
- // try to finesse this, we will mimic all imports for now.
- def keepHandler(handler: MemberHandler) = handler match {
- // While defining classes in class based mode - implicits are not needed.
- case h: ImportHandler if isClassBased && definesClass =>
- h.importedNames.exists(x => wanted.contains(x))
- case _: ImportHandler => true
- case x if generousImports => x.definesImplicit ||
- (x.definedNames exists (d => wanted.exists(w => d.startsWith(w))))
- case x => x.definesImplicit ||
- (x.definedNames exists wanted)
- }
-
- reqs match {
- case Nil =>
- predefEscapes = wanted contains PredefModule.name ; Nil
- case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
- case rh :: rest =>
- import rh.handler._
- val augment = rh match {
- case ReqAndHandler(_, _: ImportHandler) => referencedNames
- case _ => Nil
- }
- val newWanted = wanted ++ augment -- definedNames -- importedNames
- rh :: select(rest, newWanted)
- }
- }
-
- /** Flatten the handlers out and pair each with the original request */
- select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
- }
-
- // add code for a new object to hold some imports
- def addWrapper() {
- import nme.{INTERPRETER_IMPORT_WRAPPER => iw}
- code append (wrapper.prewrap format iw)
- trailingBraces append wrapper.postwrap
- accessPath append s".$iw"
- currentImps.clear()
- }
-
- def maybeWrap(names: Name*) = if (names exists currentImps) addWrapper()
-
- def wrapBeforeAndAfter[T](op: => T): T = {
- addWrapper()
- try op finally addWrapper()
- }
-
- // imports from Predef are relocated to the template header to allow hiding.
- def checkHeader(h: ImportHandler) = h.referencedNames contains PredefModule.name
-
- // loop through previous requests, adding imports for each one
- wrapBeforeAndAfter {
- // Reusing a single temporary value when import from a line with multiple definitions.
- val tempValLines = mutable.Set[Int]()
- for (ReqAndHandler(req, handler) <- reqsToUse) {
- val objName = req.lineRep.readPathInstance
- handler match {
- case h: ImportHandler if checkHeader(h) =>
- header.clear()
- header append f"${h.member}%n"
- // If the user entered an import, then just use it; add an import wrapping
- // level if the import might conflict with some other import
- case x: ImportHandler if x.importsWildcard =>
- wrapBeforeAndAfter(code append (x.member + "\n"))
- case x: ImportHandler =>
- maybeWrap(x.importedNames: _*)
- code append (x.member + "\n")
- currentImps ++= x.importedNames
-
- case x if isClassBased =>
- for (sym <- x.definedSymbols) {
- maybeWrap(sym.name)
- x match {
- case _: ClassHandler =>
- code.append(s"import ${objName}${req.accessPath}.`${sym.name}`\n")
- case _ =>
- val valName = s"${req.lineRep.packageName}${req.lineRep.readName}"
- if (!tempValLines.contains(req.lineRep.lineId)) {
- code.append(s"val $valName: ${objName}.type = $objName\n")
- tempValLines += req.lineRep.lineId
- }
- code.append(s"import ${valName}${req.accessPath}.`${sym.name}`\n")
- }
- currentImps += sym.name
- }
- // For other requests, import each defined name.
- // import them explicitly instead of with _, so that
- // ambiguity errors will not be generated. Also, quote
- // the name of the variable, so that we don't need to
- // handle quoting keywords separately.
- case x =>
- for (sym <- x.definedSymbols) {
- maybeWrap(sym.name)
- code append s"import ${x.path}\n"
- currentImps += sym.name
- }
- }
- }
- }
-
- val computedHeader = if (predefEscapes) header.toString else ""
- ComputedImports(computedHeader, code.toString, trailingBraces.toString, accessPath.toString)
- }
-
- private def allReqAndHandlers =
- prevRequestList flatMap (req => req.handlers map (req -> _))
-
-}
diff --git a/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkScala211Interpreter.scala b/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkScala211Interpreter.scala
deleted file mode 100644
index 66b74bd84d4..00000000000
--- a/flink/flink-scala-2.11/src/main/scala/org/apache/zeppelin/flink/FlinkScala211Interpreter.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.zeppelin.flink
-
-import java.io.File
-import java.net.URLClassLoader
-import java.util.Properties
-
-import org.apache.zeppelin.interpreter.InterpreterContext
-import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion
-
-import scala.tools.nsc.Settings
-import scala.tools.nsc.interpreter.{IMain, JPrintWriter}
-
-
-class FlinkScala211Interpreter(override val properties: Properties,
- override val flinkScalaClassLoader: URLClassLoader)
- extends FlinkScalaInterpreter(properties, flinkScalaClassLoader) {
-
- override def completion(buf: String,
- cursor: Int,
- context: InterpreterContext): java.util.List[InterpreterCompletion] = {
- val completions = scalaCompletion.completer().complete(buf.substring(0, cursor), cursor).candidates
- .map(e => new InterpreterCompletion(e, e, null))
- scala.collection.JavaConversions.seqAsJavaList(completions)
- }
-
- override def createIMain(settings: Settings, out: JPrintWriter): IMain = new FlinkILoopInterpreter(settings, out)
-
- override def createSettings(): Settings = {
- val settings = new Settings()
- // Don't call settings#embeddedDefaults for scala-2.11, otherwise it could cause weird error
- settings.usejavacp.value = true
- settings.Yreplsync.value = true
- settings.classpath.value = userJars.mkString(File.pathSeparator)
- settings
- }
-}
diff --git a/flink/flink-scala-2.12/flink-scala-parent b/flink/flink-scala-2.12/flink-scala-parent
deleted file mode 120000
index 3dfa859ba33..00000000000
--- a/flink/flink-scala-2.12/flink-scala-parent
+++ /dev/null
@@ -1 +0,0 @@
-../flink-scala-parent
\ No newline at end of file
diff --git a/flink/flink-scala-2.12/pom.xml b/flink/flink-scala-2.12/pom.xml
index 4e01ec49f86..d711c9cc443 100644
--- a/flink/flink-scala-2.12/pom.xml
+++ b/flink/flink-scala-2.12/pom.xml
@@ -15,44 +15,1189 @@
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
+
+ 4.0.0
+
+ flink-parentorg.apache.zeppelin
- flink-scala-parent
- 0.10.0-SNAPSHOT
- ../flink-scala-parent/pom.xml
+ 0.12.0-SNAPSHOT
+ ../pom.xml
- 4.0.0
- org.apache.zeppelinflink-scala-2.12
- 0.10.0-SNAPSHOTjarZeppelin: Flink Interpreter Scala_2.12
+
+ flink
+ ${flink1.17.version}2.12.72.12${flink.scala.version}
+ 2.3.7
+ 14.0.1
+ 10.14.2.0
+ 5.3.0
+ 1.15.0
+
+ https://archive.apache.org/dist/flink/flink-${flink.version}/flink-${flink.version}-bin-scala_${flink.scala.binary.version}.tgz
+
+
+
+
+ org.apache.zeppelin
+ flink-shims
+ ${project.version}
+
+
+
+ org.apache.zeppelin
+ flink1.15-shims
+ ${project.version}
+
+
+
+ org.apache.zeppelin
+ flink1.16-shims
+ ${project.version}
+
+
+
+ org.apache.zeppelin
+ flink1.17-shims
+ ${project.version}
+
+
+
+ org.apache.zeppelin
+ zeppelin-python
+ ${project.version}
+
+
+ io.atomix
+ *
+
+
+ com.google.guava
+ guava
+
+
+
+
+
+ ${project.groupId}
+ zeppelin-interpreter
+ ${project.version}
+ provided
+
+
+ io.atomix
+ *
+
+
+ com.google.guava
+ guava
+
+
+ io.grpc
+ *
+
+
+
+
+
+ ${project.groupId}
+ zeppelin-python
+ ${project.version}
+ tests
+ test
+
+
+ io.atomix
+ *
+
+
+ com.google.guava
+ guava
+
+
+ io.grpc
+ *
+
+
+
+
+
+ org.apache.flink
+ flink-core
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-clients
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-yarn
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-shaded-hadoop2
+
+
+ org.apache.hadoop
+ *
+
+
+
+
+
+ org.apache.flink
+ flink-table-api-scala_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-table-api-scala-bridge_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-table-api-java-bridge
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-scala_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-streaming-java
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-streaming-scala_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-java
+ ${flink.version}
+ provided
+
+
+
+
+ org.apache.flink
+ flink-hadoop-compatibility_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+ com.google.code.gson
+ gson
+
+
+
+ org.scala-lang
+ scala-library
+ ${flink.scala.version}
+ provided
+
+
+
+ org.scala-lang
+ scala-compiler
+ ${flink.scala.version}
+ provided
+
+
+
+ org.scala-lang
+ scala-reflect
+ ${flink.scala.version}
+ provided
+
+
+
+ com.mashape.unirest
+ unirest-java
+ 1.4.9
+
+
+ org.json
+ json
+
+
+
+
+
+ org.json
+ json
+
+
+
+ org.apache.flink
+ flink-connector-hive_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+ org.apache.hive
+ hive-metastore
+
+
+ org.apache.hive
+ hive-exec
+
+
+
+
+ org.apache.flink
+ flink-connector-hive_${flink.scala.binary.version}
+ ${flink.version}
+ tests
+ test
+
+
+
+ com.google.guava
+ guava
+ ${hive.guava.version}
+ provided
+
+
+ log4j
+ log4j
+
+
+
+
+
+ org.apache.hive
+ hive-metastore
+ ${hive.version}
+ provided
+
+
+ org.apache.hive
+ hive-shims
+
+
+ javolution
+ javolution
+
+
+ com.google.guava
+ guava
+
+
+ com.google.protobuf
+ protobuf-java
+
+
+ org.apache.hbase
+ hbase-client
+
+
+ commons-lang
+ commons-lang
+
+
+ co.cask.tephra
+ tephra-api
+
+
+ co.cask.tephra
+ tephra-core
+
+
+ co.cask.tephra
+ tephra-hbase-compat-1.0
+
+
+ commons-cli
+ commons-cli
+
+
+ org.apache.thrift
+ libfb303
+
+
+ javax.transaction
+ transaction-api
+
+
+ org.apache.orc
+ orc-core
+
+
+ joda-time
+ joda-time
+
+
+ org.apache.logging.log4j
+ log4j-1.2-api
+
+
+ org.apache.logging.log4j
+ log4j-slf4j-impl
+
+
+ org.apache.ant
+ ant
+
+
+ com.tdunning
+ json
+
+
+ jline
+ jline
+
+
+ org.eclipse.jetty.aggregate
+ jetty-all
+
+
+ org.eclipse.jetty.orbit
+ javax.servlet
+
+
+ org.apache.logging.log4j
+ log4j-web
+
+
+ io.dropwizard.metrics
+ metrics-core
+
+
+ io.dropwizard.metrics
+ metrics-jvm
+
+
+ io.dropwizard.metrics
+ metrics-json
+
+
+ com.github.joshelser
+ dropwizard-metrics-hadoop-metrics2-reporter
+
+
+
+
+ tomcat
+ jasper-compiler
+
+
+ tomcat
+ jasper-runtime
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+
+
+ commons-codec
+ commons-codec
+
+
+ org.apache.avro
+ avro
+
+
+ net.sf.opencsv
+ opencsv
+
+
+ org.apache.parquet
+ parquet-hadoop-bundle
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ org.apache.derby
+ derby
+
+
+ org.pentaho
+ pentaho-aggdesigner-algorithm
+
+
+
+
+
+ org.apache.hive
+ hive-exec
+ ${hive.version}
+ provided
+
+
+ org.apache.hive
+ hive-vector-code-gen
+
+
+ org.apache.hive
+ hive-llap-tez
+
+
+ org.apache.hive
+ hive-shims
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-httpclient
+ commons-httpclient
+
+
+ org.apache.logging.log4j
+ log4j-slf4j-impl
+
+
+ org.antlr
+ antlr-runtime
+
+
+ org.antlr
+ ST4
+
+
+ org.apache.ant
+ ant
+
+
+ org.apache.commons
+ commons-compress
+
+
+ org.apache.ivy
+ ivy
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ org.apache.curator
+ apache-curator
+
+
+ org.apache.curator
+ curator-framework
+
+
+ org.codehaus.groovy
+ groovy-all
+
+
+ org.apache.calcite
+ calcite-core
+
+
+ org.apache.calcite
+ calcite-druid
+
+
+ org.apache.calcite.avatica
+ avatica
+
+
+ org.apache.calcite
+ calcite-avatica
+
+
+ com.google.code.gson
+ gson
+
+
+ stax
+ stax-api
+
+
+ com.google.guava
+ guava
+
+
+ log4j
+ log4j
+
+
+ log4j
+ apache-log4j-extras
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ ch.qos.reload4j
+ reload4j
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+ org.pentaho
+ pentaho-aggdesigner-algorithm
+
+
+
+
+
+ com.klarna
+ hiverunner
+ ${hiverunner.version}
+ test
+
+
+ org.apache.hive
+ hive-serde
+
+
+ org.apache.hive
+ hive-jdbc
+
+
+ org.apache.hive
+ hive-service
+
+
+ org.apache.hive
+ hive-contrib
+
+
+ org.apache.hive
+ hive-exec
+
+
+ org.apache.hive
+ hive-hcatalog-core
+
+
+ org.apache.hive.hcatalog
+ hive-webhcat-java-client
+
+
+ org.apache.tez
+ tez-common
+
+
+
+ jdk.tools
+ jdk.tools
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ hadoop-auth
+ org.apache.hadoop
+
+
+ hadoop-annotations
+ org.apache.hadoop
+
+
+ hadoop-hdfs
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-common
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ hadoop-yarn-api
+ org.apache.hadoop
+
+
+ hadoop-yarn-client
+ org.apache.hadoop
+
+
+ hadoop-yarn-common
+ org.apache.hadoop
+
+
+ hadoop-yarn-server-common
+ org.apache.hadoop
+
+
+ hadoop-yarn-server-web-proxy
+ org.apache.hadoop
+
+
+ hadoop-shim
+ org.apache.tez
+
+
+ jms
+ javax.jms
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ ch.qos.reload4j
+ reload4j
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+
+
+
+ org.apache.hive
+ hive-service
+ ${hive.version}
+ test
+
+
+ org.jamon
+ jamon-runtime
+
+
+ org.apache.hive
+ hive-exec
+
+
+ org.apache.hive
+ hive-metastore
+
+
+ com.google.guava
+ guava
+
+
+
+ jdk.tools
+ jdk.tools
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ hadoop-auth
+ org.apache.hadoop
+
+
+ hadoop-client
+ org.apache.hadoop
+
+
+ hadoop-annotations
+ org.apache.hadoop
+
+
+ hadoop-hdfs
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ hadoop-yarn-api
+ org.apache.hadoop
+
+
+ hadoop-yarn-common
+ org.apache.hadoop
+
+
+ hadoop-yarn-registry
+ org.apache.hadoop
+
+
+ hadoop-yarn-server-applicationhistoryservice
+ org.apache.hadoop
+
+
+ hadoop-yarn-server-common
+ org.apache.hadoop
+
+
+ hadoop-yarn-server-resourcemanager
+ org.apache.hadoop
+
+
+ hbase-hadoop-compat
+ org.apache.hbase
+
+
+ org.apache.hbase
+ hbase-client
+
+
+ org.apache.hbase
+ hbase-common
+
+
+ org.apache.hbase
+ hbase-server
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ ch.qos.reload4j
+ reload4j
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+ org.pentaho
+ pentaho-aggdesigner-algorithm
+
+
+
+
+
+ org.apache.hive.hcatalog
+ hive-hcatalog-core
+ ${hive.version}
+ test
+
+
+ org.jamon
+ jamon-runtime
+
+
+ org.apache.hive
+ hive-exec
+
+
+ com.google.guava
+ guava
+
+
+ hadoop-common
+ org.apache.hadoop
+
+
+ hadoop-archives
+ org.apache.hadoop
+
+
+ hadoop-annotations
+ org.apache.hadoop
+
+
+ hadoop-hdfs
+ org.apache.hadoop
+
+
+ hadoop-mapreduce-client-core
+ org.apache.hadoop
+
+
+ org.apache.hadoop
+ hadoop-yarn-server-resourcemanager
+
+
+ log4j
+ log4j
+
+
+ log4j
+ apache-log4j-extras
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ ch.qos.reload4j
+ reload4j
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+ org.pentaho
+ pentaho-aggdesigner-algorithm
+
+
+
+
+
+ org.apache.hive.hcatalog
+ hive-webhcat-java-client
+ ${hive.version}
+ test
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+
+ org.jamon
+ jamon-runtime
+
+
+ jdk.tools
+ jdk.tools
+
+
+ jms
+ javax.jms
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ ch.qos.reload4j
+ reload4j
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+ org.pentaho
+ pentaho-aggdesigner-algorithm
+
+
+
+
+
+
+ org.apache.derby
+ derby
+ ${derby.version}
+ test
+
+
+
+ org.apache.hive
+ hive-jdbc
+ ${hive.version}
+ test
+
+
+ org.apache.hadoop
+ hadoop-yarn-server-resourcemanager
+
+
+ org.jamon
+ jamon-runtime
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ ch.qos.reload4j
+ reload4j
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+ org.pentaho
+ pentaho-aggdesigner-algorithm
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ provided
+
+
+
+ org.mockito
+ mockito-core
+ test
+
+
+
+ org.assertj
+ assertj-core
+ test
+
+
+
+ net.jodah
+ concurrentunit
+ 0.4.6
+ test
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ com.lmax
+ disruptor
+ 3.4.4
+ test
+
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ ${junit.jupiter.version}
+ test
+
+
+
+
- org.codehaus.mojo
- build-helper-maven-plugin
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+ eclipse-add-source
+
+ add-source
+
+
+
+ scala-compile-first
+ process-resources
+
+ compile
+
+
+
+ scala-test-compile-first
+ process-test-resources
+
+ testCompile
+
+
+
+
+ ${flink.scala.version}
+
+ -unchecked
+ -deprecation
+ -feature
+ -nobootcp
+
+
+ -Xms1024m
+ -Xmx1024m
+ -XX:MaxMetaspaceSize=${MaxMetaspace}
+
+
+ -source
+ ${java.version}
+ -target
+ ${java.version}
+ -Xlint:all,-serial,-path,-options
+
+
+
+
+
+
+ com.googlecode.maven-download-plugin
+ download-maven-plugin
+
+
+ download-flink-files
+ validate
+
+ wget
+
+
+ 60000
+ 5
+ ${flink.bin.download.url}
+ true
+ ${project.build.directory}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+ false
+ 1
+ false
+
+ -Xmx5120m -XX:MaxMetaspaceSize=1024m -Dsun.zip.disableMemoryMapping=true
+
+
+
+ ${basedir}/../../
+ ${project.build.directory}/flink-${flink.version}
+ ${project.build.directory}/test-classes
+
+
+
+
+
+
+ org.scalatest
+ scalatest-maven-plugin
+
+
+
+ maven-enforcer-pluginmaven-resources-plugin
+
+
+ copy-interpreter-setting
+ package
+
+ resources
+
+
+ ${project.build.directory}/../../../interpreter/${interpreter.name}
+
+
+
- org.codehaus.mojo
- build-helper-maven-plugin
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+
+ *:*
+
+ org/datanucleus/**
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+ org.scala-lang:scala-library
+ org.scala-lang:scala-compiler
+ org.scala-lang:scala-reflect
+ org.apache.flink:*
+
+
+
+
+
+
+ reference.conf
+
+
+
+
+ io.netty
+ org.apache.zeppelin.shaded.io.netty
+
+
+ com.google
+ org.apache.zeppelin.shaded.com.google
+
+
+ ${project.basedir}/../../interpreter/${interpreter.name}/zeppelin-flink-${project.version}-${flink.scala.binary.version}.jar
+
+
+
+ package
+
+ shade
+
+
+
+
+
+ net.alchim31.mavenscala-maven-plugin
@@ -86,11 +1231,107 @@
org.apache.maven.pluginsmaven-shade-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
+
+
+
+ flink-115
+
+ ${flink1.15.version}
+ 2.12.7
+ 2.12
+
+
+
+ org.apache.flink
+ flink-runtime
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-table-planner_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-python_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+
+
+ flink-116
+
+ ${flink1.16.version}
+ 2.12.7
+ 2.12
+
+
+
+ org.apache.flink
+ flink-runtime
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-table-planner_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-sql-client
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-python
+ ${flink.version}
+ provided
+
+
+
+
+
+ flink-117
+
+ ${flink1.17.version}
+ 2.12.7
+ 2.12
+
+
+
+ org.apache.flink
+ flink-runtime
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-table-planner_${flink.scala.binary.version}
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-sql-client
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-python
+ ${flink.version}
+ provided
+
+
+
+
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/YarnApplicationExecutionEnvironment.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/ApplicationModeExecutionEnvironment.java
similarity index 91%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/YarnApplicationExecutionEnvironment.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/ApplicationModeExecutionEnvironment.java
index 2cfd0e3bf9d..52ba6fe0333 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/YarnApplicationExecutionEnvironment.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/ApplicationModeExecutionEnvironment.java
@@ -37,15 +37,15 @@
/**
- * ExecutionEnvironment used for yarn application mode.
+ * ExecutionEnvironment used for application mode.
* Need to add jars of scala shell before submitting jobs.
*/
-public class YarnApplicationExecutionEnvironment extends ExecutionEnvironment {
+public class ApplicationModeExecutionEnvironment extends ExecutionEnvironment {
private FlinkILoop flinkILoop;
private FlinkScalaInterpreter flinkScalaInterpreter;
- public YarnApplicationExecutionEnvironment(PipelineExecutorServiceLoader executorServiceLoader,
+ public ApplicationModeExecutionEnvironment(PipelineExecutorServiceLoader executorServiceLoader,
Configuration configuration,
ClassLoader userClassloader,
FlinkILoop flinkILoop,
@@ -67,6 +67,12 @@ public JobExecutionResult execute() throws Exception {
return super.execute();
}
+ @Override
+ public JobExecutionResult execute(String jobName) throws Exception {
+ updateDependencies();
+ return super.execute(jobName);
+ }
+
private void updateDependencies() throws Exception {
final Configuration configuration = getConfiguration();
checkState(
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/YarnApplicationStreamEnvironment.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/ApplicationModeStreamEnvironment.java
similarity index 69%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/YarnApplicationStreamEnvironment.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/ApplicationModeStreamEnvironment.java
index ca9089f767a..4985f5fa6e6 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/YarnApplicationStreamEnvironment.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/ApplicationModeStreamEnvironment.java
@@ -25,11 +25,13 @@
import org.apache.flink.core.execution.JobClient;
import org.apache.flink.core.execution.PipelineExecutorServiceLoader;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.api.graph.StreamGraph;
import org.apache.zeppelin.flink.internal.FlinkILoop;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
+import java.lang.reflect.Field;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
@@ -39,17 +41,17 @@
/**
- * StreamExecutionEnvironment used for yarn application mode.
+ * StreamExecutionEnvironment used for application mode.
* Need to add jars of scala shell before submitting jobs.
*/
-public class YarnApplicationStreamEnvironment extends StreamExecutionEnvironment {
+public class ApplicationModeStreamEnvironment extends StreamExecutionEnvironment {
- private static final Logger LOGGER = LoggerFactory.getLogger(YarnApplicationStreamEnvironment.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(ApplicationModeStreamEnvironment.class);
private FlinkILoop flinkILoop;
private FlinkScalaInterpreter flinkScalaInterpreter;
- public YarnApplicationStreamEnvironment(PipelineExecutorServiceLoader executorServiceLoader,
+ public ApplicationModeStreamEnvironment(PipelineExecutorServiceLoader executorServiceLoader,
Configuration configuration,
ClassLoader userClassloader,
FlinkILoop flinkILoop,
@@ -71,8 +73,20 @@ public JobClient executeAsync(String jobName) throws Exception {
return super.executeAsync(jobName);
}
+ @Override
+ public JobExecutionResult execute(StreamGraph streamGraph) throws Exception {
+ updateDependencies();
+ return super.execute(streamGraph);
+ }
+
+ @Override
+ public JobClient executeAsync(StreamGraph streamGraph) throws Exception {
+ updateDependencies();
+ return super.executeAsync(streamGraph);
+ }
+
private void updateDependencies() throws Exception {
- final Configuration configuration = getConfiguration();
+ final Configuration configuration = (Configuration) getFlinkConfiguration();
checkState(
configuration.getBoolean(DeploymentOptions.ATTACHED),
"Only ATTACHED mode is supported by the scala shell.");
@@ -82,6 +96,22 @@ private void updateDependencies() throws Exception {
configuration, PipelineOptions.JARS, updatedJarFiles, URL::toString);
}
+ public Object getFlinkConfiguration() {
+ if (flinkScalaInterpreter.getFlinkVersion().isAfterFlink114()) {
+ // starting from Flink 1.14, getConfiguration() return the readonly copy of internal
+ // configuration, so we need to get the internal configuration object via reflection.
+ try {
+ Field configurationField = StreamExecutionEnvironment.class.getDeclaredField("configuration");
+ configurationField.setAccessible(true);
+ return configurationField.get(this);
+ } catch (Exception e) {
+ throw new RuntimeException("Fail to get configuration from StreamExecutionEnvironment", e);
+ }
+ } else {
+ return super.getConfiguration();
+ }
+ }
+
private List getUpdatedJarFiles() throws MalformedURLException {
final URL jarUrl = flinkILoop.writeFilesToDisk().getAbsoluteFile().toURI().toURL();
final List allJarFiles = new ArrayList<>();
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkBatchSqlInterpreter.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkBatchSqlInterpreter.java
similarity index 65%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkBatchSqlInterpreter.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkBatchSqlInterpreter.java
index d10a9eacf34..f720ff255d4 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkBatchSqlInterpreter.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkBatchSqlInterpreter.java
@@ -17,58 +17,36 @@
package org.apache.zeppelin.flink;
-import org.apache.flink.table.api.Table;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
-import org.apache.zeppelin.interpreter.ZeppelinContext;
+import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
-import java.io.IOException;
import java.util.Properties;
public class FlinkBatchSqlInterpreter extends FlinkSqlInterpreter {
- private ZeppelinContext z;
-
public FlinkBatchSqlInterpreter(Properties properties) {
super(properties);
}
- @Override
- protected boolean isBatch() {
- return true;
- }
-
@Override
public void open() throws InterpreterException {
- this.flinkInterpreter =
- getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
- this.tbenv = flinkInterpreter.getJavaBatchTableEnvironment("blink");
- this.z = flinkInterpreter.getZeppelinContext();
super.open();
+ FlinkSqlContext flinkSqlContext = new FlinkSqlContext(
+ flinkInterpreter.getExecutionEnvironment().getJavaEnv(),
+ flinkInterpreter.getStreamExecutionEnvironment().getJavaEnv(),
+ flinkInterpreter.getJavaBatchTableEnvironment("blink"),
+ flinkInterpreter.getJavaStreamTableEnvironment(),
+ flinkInterpreter.getZeppelinContext(),
+ null);
+ flinkInterpreter.getFlinkShims().initInnerBatchSqlInterpreter(flinkSqlContext);
}
@Override
- public void close() throws InterpreterException {
-
- }
-
- @Override
- public void callInnerSelect(String sql, InterpreterContext context) throws IOException {
- Table table = this.tbenv.sqlQuery(sql);
- String result = z.showData(table);
- context.out.write(result);
- }
-
- @Override
- public void cancel(InterpreterContext context) throws InterpreterException {
- flinkInterpreter.cancel(context);
- }
-
- @Override
- public FormType getFormType() throws InterpreterException {
- return FormType.SIMPLE;
+ public InterpreterResult runSqlList(String st, InterpreterContext context) {
+ return flinkShims.runSqlList(st, context, true);
}
@Override
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkInterpreter.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkInterpreter.java
similarity index 89%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkInterpreter.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkInterpreter.java
index d629ba41b1f..244d00f49c0 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkInterpreter.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkInterpreter.java
@@ -22,11 +22,8 @@
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.table.api.TableEnvironment;
-import org.apache.zeppelin.interpreter.Interpreter;
-import org.apache.zeppelin.interpreter.InterpreterContext;
-import org.apache.zeppelin.interpreter.InterpreterException;
-import org.apache.zeppelin.interpreter.InterpreterResult;
-import org.apache.zeppelin.interpreter.ZeppelinContext;
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -57,7 +54,7 @@ public FlinkInterpreter(Properties properties) {
private String extractScalaVersion() throws InterpreterException {
String scalaVersionString = scala.util.Properties.versionString();
- LOGGER.info("Using Scala: " + scalaVersionString);
+ LOGGER.info("Using Scala: {}", scalaVersionString);
if (scalaVersionString.contains("version 2.11")) {
return "2.11";
} else if (scalaVersionString.contains("version 2.12")) {
@@ -91,11 +88,11 @@ private FlinkScalaInterpreter loadFlinkScalaInterpreter() throws Exception {
String scalaVersion = extractScalaVersion();
ClassLoader flinkScalaClassLoader = FlinkScalaInterpreter.class.getClassLoader();
String innerIntpClassName = innerInterpreterClassMap.get(scalaVersion);
- Class clazz = Class.forName(innerIntpClassName);
+ Class> clazz = Class.forName(innerIntpClassName);
return (FlinkScalaInterpreter)
- clazz.getConstructor(Properties.class, URLClassLoader.class)
- .newInstance(getProperties(), flinkScalaClassLoader);
+ clazz.getConstructor(Properties.class, ClassLoader.class, ZeppelinConfiguration.class)
+ .newInstance(getProperties(), flinkScalaClassLoader, zConf);
}
@Override
@@ -108,7 +105,7 @@ public void close() throws InterpreterException {
@Override
public InterpreterResult interpret(String st, InterpreterContext context)
throws InterpreterException {
- LOGGER.debug("Interpret code: " + st);
+ LOGGER.debug("Interpret code: {}", st);
this.z.setInterpreterContext(context);
this.z.setGui(context.getGui());
this.z.setNoteGui(context.getNoteGui());
@@ -159,15 +156,15 @@ StreamExecutionEnvironment getStreamExecutionEnvironment() {
}
TableEnvironment getStreamTableEnvironment() {
- return this.innerIntp.getStreamTableEnvironment("blink");
+ return this.innerIntp.getStreamTableEnvironment();
}
org.apache.flink.table.api.TableEnvironment getJavaBatchTableEnvironment(String planner) {
return this.innerIntp.getJavaBatchTableEnvironment(planner);
}
- TableEnvironment getJavaStreamTableEnvironment(String planner) {
- return this.innerIntp.getJavaStreamTableEnvironment(planner);
+ TableEnvironment getJavaStreamTableEnvironment() {
+ return this.innerIntp.getJavaStreamTableEnvironment();
}
TableEnvironment getBatchTableEnvironment() {
diff --git a/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkSqlInterpreter.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkSqlInterpreter.java
new file mode 100644
index 00000000000..8c1c6290dc0
--- /dev/null
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkSqlInterpreter.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.flink;
+
+import org.apache.zeppelin.interpreter.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Properties;
+
+public abstract class FlinkSqlInterpreter extends AbstractInterpreter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(FlinkSqlInterpreter.class);
+
+ protected FlinkInterpreter flinkInterpreter;
+ protected FlinkShims flinkShims;
+ protected ZeppelinContext z;
+
+
+ public FlinkSqlInterpreter(Properties properties) {
+ super(properties);
+ }
+
+ @Override
+ public void open() throws InterpreterException {
+ this.flinkInterpreter =
+ getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
+ this.flinkShims = flinkInterpreter.getFlinkShims();
+ }
+
+ @Override
+ protected InterpreterResult internalInterpret(String st, InterpreterContext context) throws InterpreterException {
+ LOGGER.debug("Interpret code: {}", st);
+ // set ClassLoader of current Thread to be the ClassLoader of Flink scala-shell,
+ // otherwise codegen will fail to find classes defined in scala-shell
+ ClassLoader originClassLoader = Thread.currentThread().getContextClassLoader();
+ try {
+ Thread.currentThread().setContextClassLoader(flinkInterpreter.getFlinkScalaShellLoader());
+ flinkInterpreter.createPlannerAgain();
+ flinkInterpreter.setParallelismIfNecessary(context);
+ flinkInterpreter.setSavepointPathIfNecessary(context);
+ return runSqlList(st, context);
+ } finally {
+ Thread.currentThread().setContextClassLoader(originClassLoader);
+ }
+ }
+
+ @Override
+ public ZeppelinContext getZeppelinContext() {
+ if (flinkInterpreter != null) {
+ return flinkInterpreter.getZeppelinContext();
+ } else {
+ return null;
+ }
+ }
+
+ public abstract InterpreterResult runSqlList(String st, InterpreterContext context);
+
+ @Override
+ public void cancel(InterpreterContext context) throws InterpreterException {
+ flinkInterpreter.cancel(context);
+ }
+
+ @Override
+ public FormType getFormType() throws InterpreterException {
+ return FormType.SIMPLE;
+ }
+
+ @Override
+ public void close() throws InterpreterException {
+ }
+}
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkStreamSqlInterpreter.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkStreamSqlInterpreter.java
similarity index 69%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkStreamSqlInterpreter.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkStreamSqlInterpreter.java
index 23aadf24180..087fa3a208e 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/FlinkStreamSqlInterpreter.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/FlinkStreamSqlInterpreter.java
@@ -21,9 +21,9 @@
import org.apache.zeppelin.flink.sql.AppendStreamSqlJob;
import org.apache.zeppelin.flink.sql.SingleRowStreamSqlJob;
import org.apache.zeppelin.flink.sql.UpdateStreamSqlJob;
-import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
+import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
@@ -36,36 +36,36 @@ public FlinkStreamSqlInterpreter(Properties properties) {
super(properties);
}
- @Override
- protected boolean isBatch() {
- return false;
- }
-
@Override
public void open() throws InterpreterException {
- this.flinkInterpreter =
- getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
- this.tbenv = flinkInterpreter.getJavaStreamTableEnvironment("blink");
super.open();
- }
-
- @Override
- public void close() throws InterpreterException {
+ FlinkSqlContext flinkSqlContext = new FlinkSqlContext(
+ flinkInterpreter.getExecutionEnvironment().getJavaEnv(),
+ flinkInterpreter.getStreamExecutionEnvironment().getJavaEnv(),
+ flinkInterpreter.getJavaBatchTableEnvironment("blink"),
+ flinkInterpreter.getJavaStreamTableEnvironment(),
+ flinkInterpreter.getZeppelinContext(),
+ sql -> callInnerSelect(sql));
+ flinkInterpreter.getFlinkShims().initInnerStreamSqlInterpreter(flinkSqlContext);
}
- @Override
- public void callInnerSelect(String sql, InterpreterContext context) throws IOException {
+ public void callInnerSelect(String sql) {
+ InterpreterContext context = InterpreterContext.get();
String streamType = context.getLocalProperties().getOrDefault("type", "update");
if (streamType.equalsIgnoreCase("single")) {
SingleRowStreamSqlJob streamJob = new SingleRowStreamSqlJob(
flinkInterpreter.getStreamExecutionEnvironment(),
- tbenv,
+ flinkInterpreter.getJavaStreamTableEnvironment(),
flinkInterpreter.getJobManager(),
context,
flinkInterpreter.getDefaultParallelism(),
flinkInterpreter.getFlinkShims());
- streamJob.run(sql);
+ try {
+ streamJob.run(sql);
+ } catch (IOException e) {
+ throw new RuntimeException("Fail to run single type stream job", e);
+ }
} else if (streamType.equalsIgnoreCase("append")) {
AppendStreamSqlJob streamJob = new AppendStreamSqlJob(
flinkInterpreter.getStreamExecutionEnvironment(),
@@ -74,7 +74,11 @@ public void callInnerSelect(String sql, InterpreterContext context) throws IOExc
context,
flinkInterpreter.getDefaultParallelism(),
flinkInterpreter.getFlinkShims());
- streamJob.run(sql);
+ try {
+ streamJob.run(sql);
+ } catch (IOException e) {
+ throw new RuntimeException("Fail to run append type stream job", e);
+ }
} else if (streamType.equalsIgnoreCase("update")) {
UpdateStreamSqlJob streamJob = new UpdateStreamSqlJob(
flinkInterpreter.getStreamExecutionEnvironment(),
@@ -83,24 +87,19 @@ public void callInnerSelect(String sql, InterpreterContext context) throws IOExc
context,
flinkInterpreter.getDefaultParallelism(),
flinkInterpreter.getFlinkShims());
- streamJob.run(sql);
+ try {
+ streamJob.run(sql);
+ } catch (IOException e) {
+ throw new RuntimeException("Fail to run update type stream job", e);
+ }
} else {
- throw new IOException("Unrecognized stream type: " + streamType);
+ throw new RuntimeException("Unrecognized stream type: " + streamType);
}
}
@Override
- public void callInsertInto(String sql, InterpreterContext context) throws IOException {
- super.callInsertInto(sql, context);
- }
-
- public void cancel(InterpreterContext context) throws InterpreterException {
- this.flinkInterpreter.cancel(context);
- }
-
- @Override
- public Interpreter.FormType getFormType() throws InterpreterException {
- return Interpreter.FormType.SIMPLE;
+ public InterpreterResult runSqlList(String st, InterpreterContext context) {
+ return flinkShims.runSqlList(st, context, false);
}
@Override
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/HadoopUtils.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/HadoopUtils.java
similarity index 96%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/HadoopUtils.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/HadoopUtils.java
index a8ef900a776..2dbd7d5f516 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/HadoopUtils.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/HadoopUtils.java
@@ -79,11 +79,10 @@ private static ApplicationReport getYarnApplicationReport(ApplicationId yarnAppI
return yarnClient.getApplicationReport(yarnAppId);
}
- public static void cleanupStagingDirInternal(ClusterClient clusterClient) {
+ public static void cleanupStagingDirInternal(String yarnAppId) {
try {
- ApplicationId appId = (ApplicationId) clusterClient.getClusterId();
FileSystem fs = FileSystem.get(new Configuration());
- Path stagingDirPath = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString());
+ Path stagingDirPath = new Path(fs.getHomeDirectory(), ".flink/" + yarnAppId);
if (fs.delete(stagingDirPath, true)) {
LOGGER.info("Deleted staging directory " + stagingDirPath);
}
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/IPyFlinkInterpreter.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/IPyFlinkInterpreter.java
similarity index 94%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/IPyFlinkInterpreter.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/IPyFlinkInterpreter.java
index 763795f1125..1bc61821f85 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/IPyFlinkInterpreter.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/IPyFlinkInterpreter.java
@@ -61,6 +61,10 @@ public synchronized void open() throws InterpreterException {
opened = true;
}
+ public boolean isAfterFlink114() {
+ return flinkInterpreter.getFlinkVersion().isAfterFlink114();
+ }
+
@Override
public ZeppelinContext buildZeppelinContext() {
return flinkInterpreter.getZeppelinContext();
@@ -97,7 +101,7 @@ public InterpreterResult internalInterpret(String st,
InterpreterResult result =
super.internalInterpret("intp.resetClassLoaderInPythonThread()", context);
if (result.code() != InterpreterResult.Code.SUCCESS) {
- LOGGER.warn("Fail to resetClassLoaderInPythonThread: " + result.toString());
+ LOGGER.warn("Fail to resetClassLoaderInPythonThread: {}", result);
}
}
}
@@ -108,7 +112,7 @@ public void cancel(InterpreterContext context) throws InterpreterException {
flinkInterpreter.cancel(context);
super.cancel(context);
}
-
+
/**
* Called by python process.
*/
@@ -133,10 +137,6 @@ public int getProgress(InterpreterContext context) throws InterpreterException {
return flinkInterpreter.getProgress(context);
}
- public boolean isFlink110() {
- return flinkInterpreter.getFlinkVersion().isFlink110();
- }
-
public org.apache.flink.api.java.ExecutionEnvironment getJavaExecutionEnvironment() {
return flinkInterpreter.getExecutionEnvironment().getJavaEnv();
}
@@ -150,7 +150,7 @@ public TableEnvironment getJavaBatchTableEnvironment(String planner) {
return flinkInterpreter.getJavaBatchTableEnvironment(planner);
}
- public TableEnvironment getJavaStreamTableEnvironment(String planner) {
- return flinkInterpreter.getJavaStreamTableEnvironment(planner);
+ public TableEnvironment getJavaStreamTableEnvironment() {
+ return flinkInterpreter.getJavaStreamTableEnvironment();
}
}
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/JobManager.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/JobManager.java
similarity index 90%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/JobManager.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/JobManager.java
index d60ff1a3eca..61fdf4db80e 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/JobManager.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/JobManager.java
@@ -38,7 +38,7 @@
public class JobManager {
- private static Logger LOGGER = LoggerFactory.getLogger(JobManager.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(JobManager.class);
public static final String LATEST_CHECKPOINT_PATH = "latest_checkpoint_path";
public static final String SAVEPOINT_PATH = "savepoint_path";
public static final String RESUME_FROM_SAVEPOINT = "resumeFromSavepoint";
@@ -66,27 +66,38 @@ public JobManager(String flinkWebUrl,
public void addJob(InterpreterContext context, JobClient jobClient) {
String paragraphId = context.getParagraphId();
JobClient previousJobClient = this.jobs.put(paragraphId, jobClient);
+ if (previousJobClient != null) {
+ LOGGER.warn("There's another Job {} that is associated with paragraph {}",
+ jobClient.getJobID(), paragraphId);
+ return;
+ }
long checkInterval = Long.parseLong(properties.getProperty("zeppelin.flink.job.check_interval", "1000"));
+ if (checkInterval < 0) {
+ LOGGER.warn("The value of checkInterval must be positive {}", checkInterval);
+ return;
+ }
FlinkJobProgressPoller thread = new FlinkJobProgressPoller(flinkWebUrl, jobClient.getJobID(), context, checkInterval);
thread.setName("JobProgressPoller-Thread-" + paragraphId);
thread.start();
this.jobProgressPollerMap.put(jobClient.getJobID(), thread);
- if (previousJobClient != null) {
- LOGGER.warn("There's another Job {} that is associated with paragraph {}",
- jobClient.getJobID(), paragraphId);
- }
}
public void removeJob(String paragraphId) {
- LOGGER.info("Remove job in paragraph: " + paragraphId);
+ LOGGER.info("Remove job in paragraph: {}", paragraphId);
JobClient jobClient = this.jobs.remove(paragraphId);
if (jobClient == null) {
- LOGGER.warn("Unable to remove job, because no job is associated with paragraph: "
- + paragraphId);
+ LOGGER.warn("Unable to remove job, because no job is associated with paragraph: {}",
+ paragraphId);
return;
}
FlinkJobProgressPoller jobProgressPoller =
this.jobProgressPollerMap.remove(jobClient.getJobID());
+ if (jobProgressPoller == null) {
+ LOGGER.warn("Unable to remove poller, because no poller is associated with paragraph: {}",
+ paragraphId);
+ return;
+ }
+
jobProgressPoller.cancel();
jobProgressPoller.interrupt();
}
@@ -103,21 +114,21 @@ public void sendFlinkJobUrl(InterpreterContext context) {
infos.put("paraId", context.getParagraphId());
context.getIntpEventClient().onParaInfosReceived(infos);
} else {
- LOGGER.warn("No job is associated with paragraph: " + context.getParagraphId());
+ LOGGER.warn("No job is associated with paragraph: {}", context.getParagraphId());
}
}
public int getJobProgress(String paragraphId) {
JobClient jobClient = this.jobs.get(paragraphId);
if (jobClient == null) {
- LOGGER.warn("Unable to get job progress for paragraph: " + paragraphId +
- ", because no job is associated with this paragraph");
+ LOGGER.warn("Unable to get job progress for paragraph: {}"
+ + ", because no job is associated with this paragraph", paragraphId);
return 0;
}
FlinkJobProgressPoller jobProgressPoller = this.jobProgressPollerMap.get(jobClient.getJobID());
if (jobProgressPoller == null) {
- LOGGER.warn("Unable to get job progress for paragraph: " + paragraphId +
- ", because no job progress is associated with this jobId: " + jobClient.getJobID());
+ LOGGER.warn("Unable to get job progress for paragraph: {}"
+ + ", because no job progress is associated with this jobId: {}", paragraphId, jobClient.getJobID());
return 0;
}
return jobProgressPoller.getProgress();
@@ -163,8 +174,8 @@ public void cancelJob(InterpreterContext context) throws InterpreterException {
throw new InterpreterException(errorMessage, e);
} finally {
if (cancelled) {
- LOGGER.info("Cancelling is successful, remove the associated FlinkJobProgressPoller of paragraph: "
- + context.getParagraphId());
+ LOGGER.info("Cancelling is successful, remove the associated FlinkJobProgressPoller of paragraph: {}",
+ context.getParagraphId());
FlinkJobProgressPoller jobProgressPoller = jobProgressPollerMap.remove(jobClient.getJobID());
if (jobProgressPoller != null) {
jobProgressPoller.cancel();
@@ -220,11 +231,11 @@ public void run() {
totalTasks += vertex.getInt("parallelism");
finishedTasks += vertex.getJSONObject("tasks").getInt("FINISHED");
}
- LOGGER.debug("Total tasks:" + totalTasks);
- LOGGER.debug("Finished tasks:" + finishedTasks);
+ LOGGER.debug("Total tasks:{}", totalTasks);
+ LOGGER.debug("Finished tasks:{}", finishedTasks);
if (finishedTasks != 0) {
this.progress = finishedTasks * 100 / totalTasks;
- LOGGER.debug("Progress: " + this.progress);
+ LOGGER.debug("Progress: {}", this.progress);
}
String jobState = rootNode.getObject().getString("state");
if (jobState.equalsIgnoreCase("finished")) {
@@ -261,6 +272,7 @@ public void run() {
if (!StringUtils.isBlank(checkpointPath) && !checkpointPath.equals(latestCheckpointPath)) {
Map config = new HashMap<>();
config.put(LATEST_CHECKPOINT_PATH, checkpointPath);
+ LOGGER.info("Update latest checkpoint path: {}", checkpointPath);
context.getIntpEventClient().updateParagraphConfig(
context.getNoteId(), context.getParagraphId(), config);
latestCheckpointPath = checkpointPath;
diff --git a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/PyFlinkInterpreter.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/PyFlinkInterpreter.java
similarity index 95%
rename from flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/PyFlinkInterpreter.java
rename to flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/PyFlinkInterpreter.java
index d27f0faef64..df203b71b84 100644
--- a/flink/flink-scala-parent/src/main/java/org/apache/zeppelin/flink/PyFlinkInterpreter.java
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/PyFlinkInterpreter.java
@@ -123,7 +123,7 @@ public InterpreterResult interpret(String st, InterpreterContext context) throws
if (useIPython() || (!useIPython() && getPythonProcessLauncher().isRunning())) {
InterpreterResult result = super.interpret("intp.resetClassLoaderInPythonThread()", context);
if (result.code() != InterpreterResult.Code.SUCCESS) {
- LOGGER.warn("Fail to resetClassLoaderInPythonThread: " + result.toString());
+ LOGGER.warn("Fail to resetClassLoaderInPythonThread: {}", result);
}
}
}
@@ -182,8 +182,8 @@ public int getProgress(InterpreterContext context) throws InterpreterException {
return flinkInterpreter.getProgress(context);
}
- public boolean isFlink110() {
- return flinkInterpreter.getFlinkVersion().isFlink110();
+ public boolean isAfterFlink114() {
+ return flinkInterpreter.getFlinkVersion().isAfterFlink114();
}
public org.apache.flink.api.java.ExecutionEnvironment getJavaExecutionEnvironment() {
@@ -199,7 +199,7 @@ public TableEnvironment getJavaBatchTableEnvironment(String planner) {
return flinkInterpreter.getJavaBatchTableEnvironment(planner);
}
- public TableEnvironment getJavaStreamTableEnvironment(String planner) {
- return flinkInterpreter.getJavaStreamTableEnvironment(planner);
+ public TableEnvironment getJavaStreamTableEnvironment() {
+ return flinkInterpreter.getJavaStreamTableEnvironment();
}
}
diff --git a/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/TableEnvFactory.java b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/TableEnvFactory.java
new file mode 100644
index 00000000000..0328ca3936c
--- /dev/null
+++ b/flink/flink-scala-2.12/src/main/java/org/apache/zeppelin/flink/TableEnvFactory.java
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.flink;
+
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.api.EnvironmentSettings;
+import org.apache.flink.table.api.TableConfig;
+import org.apache.flink.table.api.TableEnvironment;
+import org.apache.flink.table.api.TableException;
+import org.apache.flink.table.catalog.CatalogManager;
+import org.apache.flink.table.catalog.FunctionCatalog;
+import org.apache.flink.table.delegation.Executor;
+import org.apache.flink.table.delegation.Planner;
+import org.apache.flink.table.module.ModuleManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.Constructor;
+import java.net.URL;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Factory class for creating flink table env for different purpose:
+ * 1. java/scala
+ * 2. stream table / batch table
+ * 3. flink planner / blink planner
+ *
+ */
+public class TableEnvFactory {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(TableEnvFactory.class);
+
+ private FlinkVersion flinkVersion;
+ private FlinkShims flinkShims;
+ private org.apache.flink.api.scala.ExecutionEnvironment benv;
+ private org.apache.flink.streaming.api.scala.StreamExecutionEnvironment senv;
+
+ private List userJars;
+
+ /***********************************************************************
+ Should use different TableConfig for different kinds of table_env
+ otherwise it will cause conflicts after flink 1.13
+ ***********************************************************************/
+ // tableConfig used for StreamTableEnvironment.
+ private TableConfig streamTableConfig;
+ // tableConfig used for BatchTableEnvironment.
+ private TableConfig batchTableConfig;
+ // tableConfig for old planner
+ private TableConfig oldPlannerStreamTableConfig;
+ private TableConfig oldPlannerBatchTableConfig;
+
+ private CatalogManager catalogManager;
+ private CatalogManager oldPlannerCatalogManager;
+ private ModuleManager moduleManager;
+ private FunctionCatalog functionCatalog;
+
+
+ public TableEnvFactory(FlinkVersion flinkVersion,
+ FlinkShims flinkShims,
+ org.apache.flink.api.scala.ExecutionEnvironment env,
+ org.apache.flink.streaming.api.scala.StreamExecutionEnvironment senv,
+ TableConfig streamTableConfig,
+ List userJars) {
+
+ this.flinkVersion = flinkVersion;
+ this.flinkShims = flinkShims;
+ this.benv = env;
+ this.senv = senv;
+ this.streamTableConfig = streamTableConfig;
+ this.batchTableConfig = new TableConfig();
+ this.batchTableConfig.getConfiguration().addAll(streamTableConfig.getConfiguration());
+ flinkShims.setBatchRuntimeMode(this.batchTableConfig);
+ this.oldPlannerBatchTableConfig = new TableConfig();
+ this.oldPlannerBatchTableConfig.getConfiguration().addAll(streamTableConfig.getConfiguration());
+ flinkShims.setOldPlanner(this.oldPlannerBatchTableConfig);
+ this.oldPlannerStreamTableConfig = new TableConfig();
+ this.oldPlannerStreamTableConfig.getConfiguration().addAll(streamTableConfig.getConfiguration());
+ flinkShims.setOldPlanner(this.oldPlannerStreamTableConfig);
+
+ this.catalogManager = (CatalogManager) flinkShims.createCatalogManager(streamTableConfig.getConfiguration());
+ this.oldPlannerCatalogManager = (CatalogManager) flinkShims.createCatalogManager(
+ this.oldPlannerStreamTableConfig.getConfiguration());
+ this.moduleManager = new ModuleManager();
+ this.functionCatalog = (FunctionCatalog) flinkShims.createFunctionCatalog(streamTableConfig,
+ catalogManager,
+ moduleManager,
+ userJars);
+ this.userJars = userJars;
+ }
+
+ public TableEnvironment createScalaFlinkBatchTableEnvironment() {
+ try {
+ Class> clazz = Class
+ .forName("org.apache.flink.table.api.bridge.scala.internal.BatchTableEnvironmentImpl");
+
+ Constructor> constructor = clazz
+ .getConstructor(
+ org.apache.flink.api.scala.ExecutionEnvironment.class,
+ TableConfig.class,
+ CatalogManager.class,
+ ModuleManager.class);
+
+ return (TableEnvironment)
+ constructor.newInstance(benv, oldPlannerBatchTableConfig, oldPlannerCatalogManager, moduleManager);
+ } catch (Exception e) {
+ throw new TableException("Fail to createScalaFlinkBatchTableEnvironment", e);
+ }
+ }
+
+ public TableEnvironment createJavaFlinkBatchTableEnvironment() {
+ try {
+ Class> clazz = Class
+ .forName("org.apache.flink.table.api.bridge.java.internal.BatchTableEnvironmentImpl");
+
+ Constructor> con = clazz.getConstructor(
+ ExecutionEnvironment.class,
+ TableConfig.class,
+ CatalogManager.class,
+ ModuleManager.class);
+
+ return (TableEnvironment) con.newInstance(
+ benv.getJavaEnv(),
+ oldPlannerBatchTableConfig,
+ oldPlannerCatalogManager,
+ moduleManager);
+ } catch (Throwable t) {
+ throw new TableException("Create BatchTableEnvironment failed.", t);
+ }
+ }
+
+ public TableEnvironment createScalaBlinkStreamTableEnvironment(EnvironmentSettings settings, ClassLoader classLoader) {
+ return (TableEnvironment) flinkShims.createScalaBlinkStreamTableEnvironment(settings,
+ senv.getJavaEnv(), streamTableConfig, moduleManager, functionCatalog, catalogManager, userJars, classLoader);
+ }
+
+ public TableEnvironment createJavaBlinkStreamTableEnvironment(EnvironmentSettings settings, ClassLoader classLoader) {
+ return (TableEnvironment) flinkShims.createJavaBlinkStreamTableEnvironment(settings,
+ senv.getJavaEnv(), streamTableConfig, moduleManager, functionCatalog, catalogManager, userJars, classLoader);
+ }
+
+ public TableEnvironment createJavaBlinkBatchTableEnvironment(
+ EnvironmentSettings settings, ClassLoader classLoader) {
+ return (TableEnvironment) flinkShims.createJavaBlinkStreamTableEnvironment(settings,
+ senv.getJavaEnv(), batchTableConfig, moduleManager, functionCatalog, catalogManager, userJars, classLoader);
+ }
+
+ public void createStreamPlanner(EnvironmentSettings settings) {
+ ImmutablePair