diff --git a/.appveyor.yml b/.appveyor.yml index e14cc62a08e..52395be924a 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -17,17 +17,33 @@ # under the License. # -version: '1.0.0-dev.{build}' +version: '0.12.0.{build}' shallow_clone: true -build: off +branches: + except: + - /dependabot/ + +platform: + - x64 + +build: Script os: - - Visual Studio 2015 + - Visual Studio 2022 + +environment: + APPVEYOR_SAVE_CACHE_ON_ERROR: True -install: - - echo "Install" +cache: + - '%USERPROFILE%/.m2' build_script: - - echo "Build" + - cmd: set JAVA_HOME=C:\Program Files\Java\jdk1.8.0 + - cmd: >- + ./mvnw.cmd clean package -DskipTests ^ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade.mojo.ShadeMojo=warn ^ + -Dorg.slf4j.simpleLogger.log.com.googlecode.download.maven.plugin.internal.WGet=warn ^ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency.fromDependencies.CopyDependenciesMojo=warn ^ + --no-transfer-progress diff --git a/.asf.yaml b/.asf.yaml index 99a643f17a8..44528f62823 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -# https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories +# https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features github: description: "Web-based notebook that enables data-driven, interactive data analytics and collaborative documents with SQL, Scala and more." @@ -30,3 +30,20 @@ github: - big-data - zeppelin - javascript + enabled_merge_buttons: + merge: false + squash: true + rebase: false + protected_branches: + master: + required_pull_request_reviews: + dismiss_stale_reviews: true + required_approving_review_count: 1 + autolink_jira: + - ZEPPELIN + +notifications: + commits: commits@zeppelin.apache.org + issues: reviews@zeppelin.apache.org + pullrequests: reviews@zeppelin.apache.org + jira_options: link label diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..cbdcbbc258e --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.js text eol=lf diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index addaf21cbb3..25167143d03 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -4,7 +4,13 @@ First time? Check out the contributing guide - https://zeppelin.apache.org/contr ### What type of PR is it? -[Bug Fix | Improvement | Feature | Documentation | Hot Fix | Refactoring] +Bug Fix +Improvement +Feature +Documentation +Hot Fix +Refactoring +*Please leave your type of PR only* ### Todos * [ ] - Task @@ -20,6 +26,6 @@ First time? Check out the contributing guide - https://zeppelin.apache.org/contr ### Screenshots (if appropriate) ### Questions: -* Does the licenses files need update? +* Does the license files need to update? * Is there breaking changes for older versions? * Does this needs documentation? diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index fdf28ff0e64..b7004b15daa 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -1,20 +1,24 @@ name: core + on: push: + branches-ignore: + - 'dependabot/**' pull_request: branches: - master - - branch-* - types: [opened, synchronize] + - 'branch-*' env: # Disable keepAlive and pool # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080 MAVEN_OPTS: >- - -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.count=3 + MAVEN_ARGS: >- + -B --no-transfer-progress ZEPPELIN_HELIUM_REGISTRY: helium SPARK_PRINT_LAUNCH_COMMAND: "true" SPARK_LOCAL_IP: 127.0.0.1 @@ -25,383 +29,470 @@ defaults: run: shell: bash -l {0} +permissions: + contents: read # to fetch code (actions/checkout) + jobs: - test-core-modules: + # test on core-modules (zeppelin-interpreter,zeppelin-zengine,zeppelin-server), + # some interpreters are included, because zeppelin-server test depends on them: spark, shell & markdown + core-modules: runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: - hadoop: [hadoop2, hadoop3] + hadoop: [hadoop3] + java: [ 8, 11 ] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache + ~/conda_pkgs_dir key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: install application with some interpreter + run: ./mvnw install -Pbuild-distr -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,angular,shell -am -Pweb-classic -Phelium-dev -Pexamples -P${{ matrix.hadoop }} ${MAVEN_ARGS} + - name: install and test plugins + run: ./mvnw package -pl zeppelin-plugins -amd ${MAVEN_ARGS} + - name: Setup conda environment with python 3.9 and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + environment-file: testing/env_python_3.9_with_R.yml + python-version: 3.9 + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false - channel-priority: strict + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" conda list conda info - - name: install application with some interpreter - run: mvn install -Pbuild-distr -DskipRat -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -B - - name: install and test plugins - run: mvn package -DskipRat -pl zeppelin-plugins -amd -B - - name: run tests with ${{ matrix.hadoop }} - run: mvn verify -Pusing-packaged-distr -DskipRat -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false - test-interpreter-modules: + - name: run tests with ${{ matrix.hadoop }} # skip spark test because we would run them in other CI + run: ./mvnw verify -Pusing-packaged-distr -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,angular,shell -am -Pweb-classic -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false + + # test interpreter modules except spark, flink, python, rlang, jupyter + interpreter-test-non-core: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + java: [ 8, 11 ] env: - INTERPRETERS: 'beam,hbase,pig,jdbc,file,flink,flink-cmd,ignite,kylin,lens,cassandra,elasticsearch,bigquery,alluxio,scio,livy,groovy,sap,java,geode,neo4j,hazelcastjet,submarine,sparql,mongodb' + INTERPRETERS: 'hbase,jdbc,file,flink-cmd,cassandra,elasticsearch,bigquery,alluxio,livy,groovy,java,neo4j,sparql,mongodb,influxdb,shell' steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: install environment + run: ./mvnw install -DskipTests -am -pl ${INTERPRETERS} ${MAVEN_ARGS} + - name: Setup conda environment with python 3.9 and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R_and_tensorflow environment-file: testing/env_python_3_with_R_and_tensorflow.yml - python-version: 3.7 + python-version: 3.9 + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" + use-mamba: true - name: verify interpreter - run: mvn verify -DskipRat -am -pl .,zeppelin-interpreter,zeppelin-interpreter-shaded,${INTERPRETERS} -Pscala-2.10 -B - test-zeppelin-client-integration-test: + run: ./mvnw verify -am -pl ${INTERPRETERS} ${MAVEN_ARGS} + + # test interpreter modules for jupyter, python, rlang + interpreter-test-jupyter-python-rlang: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python: [ 3.9 ] + java: [ 8, 11 ] + include: + - python: 3.7 + java: 8 + - python: 3.8 + java: 8 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: Setup conda environment with python ${{ matrix.python }} and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + environment-file: testing/env_python_${{ matrix.python }}_with_R.yml + python-version: ${{ matrix.python }} + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - name: install environment run: | - mvn install -DskipTests -DskipRat -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,flink-cmd,flink/flink-scala-2.11,flink/flink-scala-2.12,jdbc,shell -am - mvn package -DskipRat -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: mvn test -DskipRat -pl zeppelin-interpreter-integration -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest - test-flink-and-flink-integration-test: + ./mvnw install -DskipTests -pl python,rlang,zeppelin-jupyter-interpreter -am -Phadoop3 ${MAVEN_ARGS} + - name: run tests with ${{ matrix.python }} + run: | + ./mvnw test -pl python,rlang,zeppelin-jupyter-interpreter -DfailIfNoTests=false ${MAVEN_ARGS} + + # zeppelin integration test except Spark & Flink + zeppelin-integration-test: runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: - flink: [110, 111, 112, 113] + java: [ 8, 11 ] steps: + # user/password => root/root + - name: Start mysql + run: sudo systemctl start mysql.service - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: python_3_with_flink - environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml - python-version: 3.7 - auto-activate-base: false - name: install environment run: | - mvn install -DskipTests -DskipRat -am -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Pintegration -B - mvn clean package -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: mvn test -DskipRat -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Pintegration -DfailIfNoTests=false -B -Dtest=org.apache.zeppelin.flink.*,FlinkIntegrationTest${{ matrix.flink }},ZeppelinFlinkClusterTest${{ matrix.flink }} - run-spark-intergration-test: - runs-on: ubuntu-20.04 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Tune Runner VM - uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - distribution: 'adopt' - java-version: 8 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/zeppelin/ - key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + ./mvnw install -DskipTests -Phadoop3 -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,flink-cmd,flink/flink-scala-2.12,jdbc,shell -am -Pweb-classic -Pflink-117 ${MAVEN_ARGS} + ./mvnw package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS} + - name: Setup conda environment with python 3.9 and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + python-version: 3.9 + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: | - mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B - mvn clean package -pl zeppelin-plugins -amd -DskipTests -B - name: run tests - run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,SparkIntegrationTest30 -DfailIfNoTests=false - jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11: + run: ./mvnw test -pl zeppelin-interpreter-integration -Phadoop3 -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest,ShellIntegrationTest,JdbcIntegrationTest + - name: Print zeppelin logs + if: always() + run: if [ -d "logs" ]; then cat logs/*; fi + + flink-test-and-flink-integration-test: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python: [ 3.9 ] + flink: [116, 117] + include: + # Flink 1.15 supports Python 3.6, 3.7, and 3.8 + # https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/python/installation/ + - python: 3.8 + flink: 115 steps: - # user/password => root/root - - name: Start mysql - run: sudo systemctl start mysql.service - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: - distribution: 'adopt' + distribution: 'temurin' java-version: 8 - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: install environment for flink + run: | + ./mvnw install -DskipTests -am -pl flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Phadoop3 -Pintegration ${MAVEN_ARGS} + ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS} + - name: Setup conda environment with python ${{ matrix.python }} and R + uses: conda-incubator/setup-miniconda@v3 with: - activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + activate-environment: python_3_with_flink + environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml + python-version: ${{ matrix.python }} + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" - - name: install environment - run: | - mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B - mvn clean package -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: mvn test -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B -Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false + use-mamba: true + - name: run tests for flink + run: ./mvnw verify -pl flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -am -Phadoop3 -Pintegration -DfailIfNoTests=false -Dtest=org.apache.zeppelin.flink.*Test,FlinkIntegrationTest${{ matrix.flink }} ${MAVEN_ARGS} + - name: Print zeppelin logs + if: always() + run: if [ -d "logs" ]; then cat logs/*; fi - spark-2-4-and-scale-2-12: + + spark-integration-test: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + hadoop: [ 3 ] + java: [ 8, 11 ] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: install environment + run: | + ./mvnw install -DskipTests -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown -am -Pweb-classic -Phadoop3 -Pintegration ${MAVEN_ARGS} + ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS} + - name: Setup conda environment with python 3.9 and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + python-version: 3.9 + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: | - mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B - - name: run tests - run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false + - name: run tests on hadoop${{ matrix.hadoop }} + run: ./mvnw test -pl zeppelin-interpreter-integration -Phadoop${{ matrix.hadoop }} -Pintegration -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest32,SparkIntegrationTest32,ZeppelinSparkClusterTest33,SparkIntegrationTest33 -DfailIfNoTests=false ${MAVEN_ARGS} - spark-2-3-and-scale-2-11-and-other-interpreter: + # test on spark for each spark version & scala version + spark-test: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python: [ 3.9 ] + java: [ 8, 11 ] + include: + - python: 3.7 + java: 8 + - python: 3.8 + java: 8 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: install environment + run: ./mvnw install -DskipTests -pl spark-submit,spark/scala-2.12,spark/scala-2.13 -am -Phadoop3 ${MAVEN_ARGS} + - name: Setup conda environment with python ${{ matrix.python }} and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + environment-file: testing/env_python_${{ matrix.python }}_with_R.yml + python-version: ${{ matrix.python }} + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment + - name: run spark-3.3 tests with scala-2.12 and python-${{ matrix.python }} run: | - mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B - - name: run tests - run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + rm -rf spark/interpreter/metastore_db + ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.3 -Pspark-scala-2.12 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS} + - name: run spark-3.3 tests with scala-2.13 and python-${{ matrix.python }} + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.3 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS} + - name: run spark-3.4 tests with scala-2.13 and python-${{ matrix.python }} + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.4 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS} + - name: run spark-3.5 tests with scala-2.13 and python-${{ matrix.python }} + if: matrix.python >= '3.8' + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.5 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS} - spark-2-2-and-scale-2-10-and-other-interpreter: + # The version combination is based on the facts: + # 1. official Livy 0.8 binary tarball is built against Spark 2.4 + # 2. official Spark 2.4 binary tarball is built against Scala 2.11 + # 3. Spark 2.4 support Python 2.7, 3.4 to 3.7 + livy-0-8-with-spark-2-4-under-python37: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Set up JDK 8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: - distribution: 'adopt' + distribution: 'temurin' java-version: 8 - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- + - name: install environment + run: | + ./mvnw install -DskipTests -pl livy -am ${MAVEN_ARGS} + ./testing/downloadSpark.sh "2.4.8" "2.7" + ./testing/downloadLivy.sh "0.8.0-incubating" "2.11" - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: - activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml + activate-environment: python_37_with_R + environment-file: testing/env_python_3.7_with_R.yml python-version: 3.7 + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B - name: run tests - run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false - test-livy-0-5-with-spark-2-2-0-under-python3: + run: | + export SPARK_HOME=$PWD/spark-2.4.8-bin-hadoop2.7 + export LIVY_HOME=$PWD/apache-livy-0.8.0-incubating_2.11-bin + ./mvnw verify -pl livy -am ${MAVEN_ARGS} + + default-build: runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + java: [ 8, 11 ] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: ${{ matrix.java }} - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 - auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" - - name: install environment - run: | - mvn install -DskipTests -DskipRat -pl livy -am -B - ./testing/downloadSpark.sh "2.2.0" "2.6" - ./testing/downloadLivy.sh "0.5.0-incubating" - - name: run tests - run: mvn verify -DskipRat -pl livy -am -B + - name: build without any profiles + run: ./mvnw clean verify -DskipTests ${MAVEN_ARGS} diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml index 9fb1f8b539c..3ebc68594df 100644 --- a/.github/workflows/frontend.yml +++ b/.github/workflows/frontend.yml @@ -1,114 +1,137 @@ name: frontend + on: push: + branches-ignore: + - 'dependabot/**' pull_request: branches: - master - - branch-* - types: [opened, synchronize] + - 'branch-*' env: # Disable keepAlive and pool # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080 MAVEN_OPTS: >- - -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.count=3 + MAVEN_ARGS: >- + -B --no-transfer-progress ZEPPELIN_HELIUM_REGISTRY: helium SPARK_PRINT_LAUNCH_COMMAND: "true" SPARK_LOCAL_IP: 127.0.0.1 ZEPPELIN_LOCAL_IP: 127.0.0.1 - INTERPRETERS: '!beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql,!mongodb' + INTERPRETERS: '!hbase,!jdbc,!file,!flink,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!java,!neo4j,!sparql,!mongodb' + +permissions: + contents: read # to fetch code (actions/checkout) jobs: run-e2e-tests-in-zeppelin-web: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: 11 - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - name: Install application - run: mvn -B install -DskipTests -DskipRat -pl ${INTERPRETERS} -Phadoop2 -Pscala-2.11 + run: ./mvnw clean install -DskipTests -am -pl zeppelin-web -Pweb-classic -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist ${MAVEN_ARGS} - name: Run headless test - run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" mvn verify -DskipRat -pl zeppelin-web -Phadoop2 -Pscala-2.11 -Pweb-e2e -B + run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" ./mvnw verify -pl zeppelin-web -Pweb-classic -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist -Pweb-e2e ${MAVEN_ARGS} + - name: Print zeppelin logs + if: always() + run: if [ -d "logs" ]; then cat logs/*; fi + run-tests-in-zeppelin-web-angular: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: 11 - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - name: Run headless test - run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" mvn package -DskipRat -pl zeppelin-web-angular -Pweb-angular -B + run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" ./mvnw package -pl zeppelin-web-angular ${MAVEN_ARGS} - test-selenium-with-spark-module-for-spark-2-3: + test-selenium-with-spark-module-for-spark-3-4: runs-on: ubuntu-20.04 defaults: run: shell: bash -l {0} steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v4 with: - distribution: 'adopt' - java-version: 8 + distribution: 'temurin' + java-version: 11 - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.m2/repository !~/.m2/repository/org/apache/zeppelin/ + ~/.spark-dist + ~/.cache key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 + - name: Setup conda environment with python 3.9 and R + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: python_3_with_R environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + python-version: 3.9 + miniforge-variant: Mambaforge + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment + - name: Install Environment run: | - mvn clean install -DskipTests -DskipRat -pl ${INTERPRETERS} -Pspark-2.3 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B - mvn clean package -pl zeppelin-plugins -amd -B - ./testing/downloadSpark.sh "2.3.2" "2.6" + ./mvnw clean install -DskipTests -am -pl zeppelin-integration -Pweb-classic -Pintegration -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist ${MAVEN_ARGS} - name: run tests - run: xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" mvn verify -DskipRat -Pspark-2.3 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B -pl zeppelin-integration -DfailIfNoTests=false + run: | + xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" ./mvnw verify -DfailIfNoTests=false -pl zeppelin-integration -Pweb-classic -Pintegration -Pspark-scala-2.12 -Pspark-3.4 -Phadoop3 -Pweb-dist -Pusing-source-tree ${MAVEN_ARGS} + - name: Print zeppelin logs + if: always() + run: if [ -d "logs" ]; then cat logs/*; fi diff --git a/.github/workflows/quick.yml b/.github/workflows/quick.yml new file mode 100644 index 00000000000..b26f015c6b4 --- /dev/null +++ b/.github/workflows/quick.yml @@ -0,0 +1,54 @@ +name: quick + +on: + push: + branches-ignore: + - 'dependabot/**' + pull_request: + branches: + - master + - 'branch-*' + +permissions: + contents: read + +env: + # Disable keepAlive and pool + # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080 + MAVEN_OPTS: >- + -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit + -Dhttp.keepAlive=false + -Dmaven.wagon.http.pool=false + -Dmaven.wagon.http.retryHandler.count=3 + MAVEN_ARGS: >- + -B --no-transfer-progress + +jobs: + license-check: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + - name: Check Rat + run: ./mvnw apache-rat:check -Prat ${MAVEN_ARGS} + maven-validate: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + hadoop: [hadoop3] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 11 + - name: Run Maven Validate + run: ./mvnw validate -P${{ matrix.hadoop }} -Pinclude-hadoop ${MAVEN_ARGS} diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml deleted file mode 100644 index b32cb598e86..00000000000 --- a/.github/workflows/rat.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: rat -on: - push: - pull_request: - branches: - - master - - branch-* - types: [opened, synchronize] - -jobs: - license-check: - runs-on: ubuntu-20.04 - env: - # Disable keepAlive and pool - # https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080 - MAVEN_OPTS: >- - -Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn - -Dhttp.keepAlive=false - -Dmaven.wagon.http.pool=false - -Dmaven.wagon.http.retryHandler.count=3 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - distribution: 'adopt' - java-version: 8 - - name: Check Rat - run: mvn apache-rat:check -Prat -B diff --git a/.gitignore b/.gitignore index 618075d1de7..61ae7eb6a67 100644 --- a/.gitignore +++ b/.gitignore @@ -10,13 +10,16 @@ /interpreter/* !/interpreter/lib +# metals +.bloop +.metals + # interpreter temp files derby.log spark/metastore_db spark-1.*-bin-hadoop* .spark-dist -lens/lens-cli-hist.log # Zeppelin server zeppelin-server/local-repo @@ -31,7 +34,6 @@ conf/keystore conf/truststore conf/interpreter.json conf/notebook-authorization.json -conf/shiro.ini conf/credentials.json conf/helium.json @@ -56,8 +58,6 @@ zeppelin-web/yarn.lock .Rhistory /R/ -# scio -.bigquery/ # project level /logs/ @@ -70,6 +70,7 @@ zeppelin-web/yarn.lock /warehouse/ /notebook/ /local-repo/ +/notebook_*/ **/sessions/ **/data/ @@ -100,6 +101,12 @@ Thumbs.db .idea/ *.iml +# Jetbrains Fleet project files +.fleet/ + +# vscode project files +.vscode/ + # maven target files target/ **/target/ @@ -107,7 +114,7 @@ target/ # maven flattened pom files **/.flattened-pom.xml -# Generated by Jekyll +# Generated by Jekyll docs/_site/ *~ @@ -129,3 +136,9 @@ tramp # Git properties **/git.properties + +# jEnv file +.java-version + +# pyenv file +.python-version diff --git a/.mvn/wrapper/MavenWrapperDownloader.java b/.mvn/wrapper/MavenWrapperDownloader.java new file mode 100644 index 00000000000..b901097f2db --- /dev/null +++ b/.mvn/wrapper/MavenWrapperDownloader.java @@ -0,0 +1,117 @@ +/* + * Copyright 2007-present the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.net.*; +import java.io.*; +import java.nio.channels.*; +import java.util.Properties; + +public class MavenWrapperDownloader { + + private static final String WRAPPER_VERSION = "0.5.6"; + /** + * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. + */ + private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" + + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; + + /** + * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to + * use instead of the default one. + */ + private static final String MAVEN_WRAPPER_PROPERTIES_PATH = + ".mvn/wrapper/maven-wrapper.properties"; + + /** + * Path where the maven-wrapper.jar will be saved to. + */ + private static final String MAVEN_WRAPPER_JAR_PATH = + ".mvn/wrapper/maven-wrapper.jar"; + + /** + * Name of the property which should be used to override the default download url for the wrapper. + */ + private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; + + public static void main(String args[]) { + System.out.println("- Downloader started"); + File baseDirectory = new File(args[0]); + System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); + + // If the maven-wrapper.properties exists, read it and check if it contains a custom + // wrapperUrl parameter. + File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); + String url = DEFAULT_DOWNLOAD_URL; + if(mavenWrapperPropertyFile.exists()) { + FileInputStream mavenWrapperPropertyFileInputStream = null; + try { + mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); + Properties mavenWrapperProperties = new Properties(); + mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); + url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); + } catch (IOException e) { + System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); + } finally { + try { + if(mavenWrapperPropertyFileInputStream != null) { + mavenWrapperPropertyFileInputStream.close(); + } + } catch (IOException e) { + // Ignore ... + } + } + } + System.out.println("- Downloading from: " + url); + + File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); + if(!outputFile.getParentFile().exists()) { + if(!outputFile.getParentFile().mkdirs()) { + System.out.println( + "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); + } + } + System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); + try { + downloadFileFromURL(url, outputFile); + System.out.println("Done"); + System.exit(0); + } catch (Throwable e) { + System.out.println("- Error downloading"); + e.printStackTrace(); + System.exit(1); + } + } + + private static void downloadFileFromURL(String urlString, File destination) throws Exception { + if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { + String username = System.getenv("MVNW_USERNAME"); + char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); + Authenticator.setDefault(new Authenticator() { + @Override + protected PasswordAuthentication getPasswordAuthentication() { + return new PasswordAuthentication(username, password); + } + }); + } + URL website = new URL(urlString); + ReadableByteChannel rbc; + rbc = Channels.newChannel(website.openStream()); + FileOutputStream fos = new FileOutputStream(destination); + fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); + fos.close(); + rbc.close(); + } + +} diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 00000000000..2cc7d4a55c0 Binary files /dev/null and b/.mvn/wrapper/maven-wrapper.jar differ diff --git a/scripts/docker/submarine/1.0.0/zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2/build.sh b/.mvn/wrapper/maven-wrapper.properties similarity index 78% rename from scripts/docker/submarine/1.0.0/zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2/build.sh rename to .mvn/wrapper/maven-wrapper.properties index 0eccdf4596b..7e83fe97d7d 100644 --- a/scripts/docker/submarine/1.0.0/zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2/build.sh +++ b/.mvn/wrapper/maven-wrapper.properties @@ -1,4 +1,3 @@ -#!/bin/bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -16,4 +15,5 @@ # limitations under the License. # -docker build -t zeppelin-cpu-tensorflow_1.13.1-hadoop_3.1.2:1.0.0 . +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar diff --git a/Dockerfile b/Dockerfile index 28606ea4cfb..6f1777e0862 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,19 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # -FROM maven:3.5-jdk-8 as builder +FROM openjdk:11 as builder ADD . /workspace/zeppelin WORKDIR /workspace/zeppelin +ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" # Allow npm and bower to run with root privileges RUN echo "unsafe-perm=true" > ~/.npmrc && \ echo '{ "allow_root": true }' > ~/.bowerrc && \ - mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular && \ + ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.4 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist && \ # Example with doesn't compile all interpreters - # mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -pl '!groovy,!submarine,!livy,!hbase,!pig,!file,!flink,!ignite,!kylin,!lens' && \ - mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \ + # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.4 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist -pl '!groovy,!livy,!hbase,!file,!flink' && \ + mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*-bin/zeppelin-*-bin /opt/zeppelin/ && \ # Removing stuff saves time, because docker creates a temporary layer rm -rf ~/.m2 && \ rm -rf /workspace/zeppelin/* -FROM ubuntu:20.04 +FROM ubuntu:22.04 COPY --from=builder /opt/zeppelin /opt/zeppelin diff --git a/LICENSE b/LICENSE index 7f759130ca3..2efdc5dbb05 100644 --- a/LICENSE +++ b/LICENSE @@ -265,7 +265,6 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (Apache 2.0) Google Cloud Client Library for Java (https://github.com/GoogleCloudPlatform/google-cloud-java) (Apache 2.0) concurrentunit (https://github.com/jhalterman/concurrentunit) (Apache 2.0) Embedded MongoDB (https://github.com/flapdoodle-oss/de.flapdoodle.embed.mongo) - (Apache 2.0) Kotlin (https://github.com/JetBrains/kotlin) (Apache 2.0) s3proxy (https://github.com/gaul/s3proxy) (Apache 2.0) kubernetes-client (https://github.com/fabric8io/kubernetes-client) diff --git a/NOTICE b/NOTICE index 52de5bfc89f..63e12dbd6c8 100644 --- a/NOTICE +++ b/NOTICE @@ -1,8 +1,8 @@ Apache Zeppelin -Copyright 2015 - 2016 The Apache Software Foundation +Copyright 2015 - 2024 The Apache Software Foundation This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). +The Apache Software Foundation (https://www.apache.org/). Portions of this software were developed at NFLabs, Inc. (http://www.nflabs.com) diff --git a/STYLE.md b/STYLE.md index 8182301b5a5..b0a5f2a084a 100644 --- a/STYLE.md +++ b/STYLE.md @@ -7,7 +7,7 @@ app/styles/looknfeel Overall look and theme of the Zeppelin notebook page can be customized here. ### Code Syntax Highlighting -There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls pegdown parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js. +There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls flexmark parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js. #### Theme on Ace Editor app/scripts/controllers/paragraph.js @@ -16,7 +16,7 @@ Call setTheme on the editor with the theme path/name. [List of themes on GitHub](https://github.com/ajaxorg/ace/tree/master/lib/ace/theme) #### Style for Markdown Code Blocks -Highlight.js parses and converts <pre><code> blocks from pegdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build. +Highlight.js parses and converts <pre><code> blocks from markdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build. Note that code block background color is overriden in app/styles/notebook.css (look for .paragraph .tableDisplay .hljs). diff --git a/_tools/maven-4.0.0.xsd b/_tools/maven-4.0.0.xsd deleted file mode 100644 index f3a36834a2c..00000000000 --- a/_tools/maven-4.0.0.xsd +++ /dev/null @@ -1,2484 +0,0 @@ - - - - - - - - - 3.0.0+ - - - The <code>&lt;project&gt;</code> element is the root of the descriptor. - The following table lists all of the possible child elements. - - - - - - - 3.0.0+ - - - The <code>&lt;project&gt;</code> element is the root of the descriptor. - The following table lists all of the possible child elements. - - - - - - - 4.0.0+ - Declares to which version of project descriptor this POM conforms. - - - - - 4.0.0+ - The location of the parent project, if one exists. Values from the parent - project will be the default for this project if they are left unspecified. The location - is given as a group ID, artifact ID and version. - - - - - 3.0.0+ - - - A universally unique identifier for a project. It is normal to - use a fully-qualified package name to distinguish it from other - projects with a similar name (eg. <code>org.apache.maven</code>). - - - - - - - 3.0.0+ - The identifier for this artifact that is unique within the group given by the - group ID. An artifact is something that is either produced or used by a project. - Examples of artifacts produced by Maven for a project include: JARs, source and binary - distributions, and WARs. - - - - - 4.0.0+ - The current version of the artifact produced by this project. - - - - - 4.0.0+ - - - The type of artifact this project produces, for example <code>jar</code> - <code>war</code> - <code>ear</code> - <code>pom</code>. - Plugins can create their own packaging, and - therefore their own packaging types, - so this list does not contain all possible types. - - - - - - - 3.0.0+ - The full name of the project. - - - - - 3.0.0+ - A detailed description of the project, used by Maven whenever it needs to - describe the project, such as on the web site. While this element can be specified as - CDATA to enable the use of HTML tags within the description, it is discouraged to allow - plain text representation. If you need to modify the index page of the generated web - site, you are able to specify your own instead of adjusting this text. - - - - - 3.0.0+ - - - The URL to the project's homepage. - <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId - - - - - - - 3.0.0+ - The year of the project's inception, specified with 4 digits. This value is - used when generating copyright notices as well as being informational. - - - - - 3.0.0+ - This element describes various attributes of the organization to which the - project belongs. These attributes are utilized when documentation is created (for - copyright notices and links). - - - - - 3.0.0+ - - - This element describes all of the licenses for this project. - Each license is described by a <code>license</code> element, which - is then described by additional elements. - Projects should only list the license(s) that applies to the project - and not the licenses that apply to dependencies. - If multiple licenses are listed, it is assumed that the user can select - any of them, not that they must accept all. - - - - - - - - - - - - 3.0.0+ - Describes the committers of a project. - - - - - - - - - - 3.0.0+ - Describes the contributors to a project that are not yet committers. - - - - - - - - - - 3.0.0+ - Contains information about a project's mailing lists. - - - - - - - - - - 4.0.0+ - Describes the prerequisites in the build environment for this project. - - - - - 4.0.0+ - The modules (sometimes called subprojects) to build as a part of this - project. Each module listed is a relative path to the directory containing the module. - To be consistent with the way default urls are calculated from parent, it is recommended - to have module names match artifact ids. - - - - - - - - - - 4.0.0+ - Specification for the SCM used by the project, such as CVS, Subversion, etc. - - - - - 4.0.0+ - The project's issue management system information. - - - - - 4.0.0+ - The project's continuous integration information. - - - - - 4.0.0+ - Distribution information for a project that enables deployment of the site - and artifacts to remote web servers and repositories respectively. - - - - - 4.0.0+ - - - Properties that can be used throughout the POM as a substitution, and - are used as filters in resources if enabled. - The format is <code>&lt;name&gt;value&lt;/name&gt;</code>. - - - - - - - - - - - - 4.0.0+ - Default dependency information for projects that inherit from this one. The - dependencies in this section are not immediately resolved. Instead, when a POM derived - from this one declares a dependency described by a matching groupId and artifactId, the - version and other values from this section are used for that dependency if they were not - already specified. - - - - - 3.0.0+ - - - This element describes all of the dependencies associated with a - project. - These dependencies are used to construct a classpath for your - project during the build process. They are automatically downloaded from the - repositories defined in this project. - See <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the - dependency mechanism</a> for more information. - - - - - - - - - - - - 4.0.0+ - The lists of the remote repositories for discovering dependencies and - extensions. - - - - - - - - - - 4.0.0+ - The lists of the remote repositories for discovering plugins for builds and - reports. - - - - - - - - - - 3.0.0+ - Information required to build the project. - - - - - 4.0.0+ - - - <b>Deprecated</b>. Now ignored by Maven. - - - - - - - - - - - - 4.0.0+ - - - This element includes the specification of report plugins to use - to generate the reports on the Maven-generated site. - These reports will be run when a user executes <code>mvn site</code>. - All of the reports will be included in the navigation bar for browsing. - - - - - - - 4.0.0+ - A listing of project-local build profiles which will modify the build process - when activated. - - - - - - - - - - - - 4.0.0+ - - - The <code>&lt;parent&gt;</code> element contains information required to locate the parent project from which - this project will inherit from. - <strong>Note:</strong> The children of this element are not interpolated and must be given as literal values. - - - - - - - 4.0.0+ - The group id of the parent project to inherit from. - - - - - 4.0.0+ - The artifact id of the parent project to inherit from. - - - - - 4.0.0+ - The version of the parent project to inherit. - - - - - 4.0.0+ - - - The relative path of the parent <code>pom.xml</code> file within the check out. - If not specified, it defaults to <code>../pom.xml</code>. - Maven looks for the parent POM first in this location on - the filesystem, then the local repository, and lastly in the remote repo. - <code>relativePath</code> allows you to select a different location, - for example when your structure is flat, or deeper without an intermediate parent POM. - However, the group ID, artifact ID and version are still required, - and must match the file in the location given or it will revert to the repository for the POM. - This feature is only for enhancing the development in a local checkout of that project. - Set the value to an empty string in case you want to disable the feature and always resolve - the parent POM from the repositories. - - - - - - - - - 3.0.0+ - Specifies the organization that produces this project. - - - - - 3.0.0+ - The full name of the organization. - - - - - 3.0.0+ - The URL to the organization's home page. - - - - - - - 4.0.0+ - This elements describes all that pertains to distribution for a project. It is - primarily used for deployment of artifacts and the site produced by the build. - - - - - 4.0.0+ - Information needed to deploy the artifacts generated by the project to a - remote repository. - - - - - 4.0.0+ - - - Where to deploy snapshots of artifacts to. If not given, it defaults to the - <code>repository</code> element. - - - - - - - 4.0.0+ - Information needed for deploying the web site of the project. - - - - - 4.0.0+ - - - The URL of the project's download page. If not given users will be - referred to the homepage given by <code>url</code>. - This is given to assist in locating artifacts that are not in the repository due to - licensing restrictions. - - - - - - - 4.0.0+ - Relocation information of the artifact if it has been moved to a new group ID - and/or artifact ID. - - - - - 4.0.0+ - - - Gives the status of this artifact in the remote repository. - This must not be set in your local project, as it is updated by - tools placing it in the reposiory. Valid values are: <code>none</code> (default), - <code>converted</code> (repository manager converted this from an Maven 1 POM), - <code>partner</code> - (directly synced from a partner Maven 2 repository), <code>deployed</code> (was deployed from a Maven 2 - instance), <code>verified</code> (has been hand verified as correct and final). - - - - - - - - - 4.0.0+ - Describes where an artifact has moved to. If any of the values are omitted, it is - assumed to be the same as it was before. - - - - - 4.0.0+ - The group ID the artifact has moved to. - - - - - 4.0.0+ - The new artifact ID of the artifact. - - - - - 4.0.0+ - The new version of the artifact. - - - - - 4.0.0+ - An additional message to show the user about the move, such as the reason. - - - - - - - 4.0.0+ - Contains the information needed for deploying websites. - - - - - 4.0.0+ - - - A unique identifier for a deployment location. This is used to match the - site to configuration in the <code>settings.xml</code> file, for example. - - - - - - - 4.0.0+ - Human readable name of the deployment location. - - - - - 4.0.0+ - - - The url of the location where website is deployed, in the form <code>protocol://hostname/path</code>. - <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId - - - - - - - - - 4.0.0+ - Repository contains the information needed for deploying to the remote - repository. - - - - - 4.0.0+ - Whether to assign snapshots a unique version comprised of the timestamp and - build number, or to use the same version each time - - - - - 4.0.0+ - How to handle downloading of releases from this repository. - - - - - 4.0.0+ - How to handle downloading of snapshots from this repository. - - - - - 4.0.0+ - - - A unique identifier for a repository. This is used to match the repository - to configuration in the <code>settings.xml</code> file, for example. Furthermore, the identifier is - used during POM inheritance and profile injection to detect repositories that should be merged. - - - - - - - 4.0.0+ - Human readable name of the repository. - - - - - 4.0.0+ - - - The url of the repository, in the form <code>protocol://hostname/path</code>. - - - - - - - 4.0.0+ - - - The type of layout this repository uses for locating and storing artifacts - - can be <code>legacy</code> or <code>default</code>. - - - - - - - - - 4.0.0+ - Download policy. - - - - - 4.0.0+ - - - Whether to use this repository for downloading this type of artifact. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>true</code>. - - - - - - - 4.0.0+ - - - The frequency for downloading updates - can be - <code>always,</code> - <code>daily</code> - (default), - <code>interval:XXX</code> - (in minutes) or - <code>never</code> - (only if it doesn't exist locally). - - - - - - - 4.0.0+ - - - What to do when verification of an artifact checksum fails. Valid values are - <code>ignore</code> - , - <code>fail</code> - or - <code>warn</code> - (the default). - - - - - - - - - 4.0.0+ - Describes the prerequisites a project can have. - - - - - 4.0.0+ - - For a plugin project, the minimum version of Maven required to use - the resulting plugin.<br /> - For specifying the minimum version of Maven required to build a - project, this element is <b>deprecated</b>. Use the Maven Enforcer - Plugin's <a href="https://maven.apache.org/enforcer/enforcer-rules/requireMavenVersion.html"><code>requireMavenVersion</code></a> - rule instead. - - - - - - - - - 3.0.0+ - Description of a person who has contributed to the project, but who does not have - commit privileges. Usually, these contributions come in the form of patches submitted. - - - - - 3.0.0+ - The full name of the contributor. - - - - - 3.0.0+ - The email address of the contributor. - - - - - 3.0.0+ - The URL for the homepage of the contributor. - - - - - 3.0.0+ - The organization to which the contributor belongs. - - - - - 3.0.0+ - The URL of the organization. - - - - - 3.0.0+ - - - The roles the contributor plays in the project. Each role is described by a - <code>role</code> element, the body of which is a role name. This can also be used to - describe the contribution. - - - - - - - - - - - - 3.0.0+ - - - The timezone the contributor is in. Typically, this is a number in the range - <a href="http://en.wikipedia.org/wiki/UTC%E2%88%9212:00">-12</a> to <a href="http://en.wikipedia.org/wiki/UTC%2B14:00">+14</a> - or a valid time zone id like "America/Montreal" (UTC-05:00) or "Europe/Paris" (UTC+01:00). - - - - - - - 3.0.0+ - Properties about the contributor, such as an instant messenger handle. - - - - - - - - - - - - 4.0.0+ - - - The <code>&lt;scm&gt;</code> element contains informations required to the SCM - (Source Control Management) of the project. - - - - - - - 4.0.0+ - - - The source control management system URL - that describes the repository and how to connect to the - repository. For more information, see the - <a href="http://maven.apache.org/scm/scm-url-format.html">URL format</a> - and <a href="http://maven.apache.org/scm/scms-overview.html">list of supported SCMs</a>. - This connection is read-only. - <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId - - - - - - - 4.0.0+ - - - Just like <code>connection</code>, but for developers, i.e. this scm connection - will not be read only. - <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId - - - - - - - 4.0.0+ - The tag of current code. By default, it's set to HEAD during development. - - - - - 4.0.0+ - - - The URL to the project's browsable SCM repository, such as ViewVC or Fisheye. - <br /><b>Default value is</b>: parent value [+ path adjustment] + artifactId - - - - - - - - - 4.0.0+ - A repository contains the information needed for establishing connections with - remote repository. - - - - - 4.0.0+ - How to handle downloading of releases from this repository. - - - - - 4.0.0+ - How to handle downloading of snapshots from this repository. - - - - - 4.0.0+ - - - A unique identifier for a repository. This is used to match the repository - to configuration in the <code>settings.xml</code> file, for example. Furthermore, the identifier is - used during POM inheritance and profile injection to detect repositories that should be merged. - - - - - - - 4.0.0+ - Human readable name of the repository. - - - - - 4.0.0+ - - - The url of the repository, in the form <code>protocol://hostname/path</code>. - - - - - - - 4.0.0+ - - - The type of layout this repository uses for locating and storing artifacts - - can be <code>legacy</code> or <code>default</code>. - - - - - - - - - 4.0.0+ - Information about the issue tracking (or bug tracking) system used to manage this - project. - - - - - 4.0.0+ - The name of the issue management system, e.g. Bugzilla - - - - - 4.0.0+ - URL for the issue management system used by the project. - - - - - - - 4.0.0+ - - - The <code>&lt;CiManagement&gt;</code> element contains informations required to the - continuous integration system of the project. - - - - - - - 4.0.0+ - - - The name of the continuous integration system, e.g. <code>continuum</code>. - - - - - - - 4.0.0+ - URL for the continuous integration system used by the project if it has a web - interface. - - - - - 4.0.0+ - Configuration for notifying developers/users when a build is unsuccessful, - including user information and notification mode. - - - - - - - - - - - - 4.0.0+ - Configures one method for notifying users/developers when a build breaks. - - - - - 4.0.0+ - The mechanism used to deliver notifications. - - - - - 4.0.0+ - Whether to send notifications on error. - - - - - 4.0.0+ - Whether to send notifications on failure. - - - - - 4.0.0+ - Whether to send notifications on success. - - - - - 4.0.0+ - Whether to send notifications on warning. - - - - - 4.0.0+ - - - <b>Deprecated</b>. Where to send the notification to - eg email address. - - - - - - - 0.0.0+ - Extended configuration specific to this notifier goes here. - - - - - - - - - - - - 4.0.0+ - Modifications to the build process which is activated based on environmental - parameters or command line arguments. - - - - - 4.0.0+ - The identifier of this build profile. This is used for command line - activation, and identifies profiles to be merged. - - - - - - 4.0.0+ - The conditional logic which will automatically trigger the inclusion of this - profile. - - - - - 4.0.0+ - Information required to build the project. - - - - - 4.0.0+ - The modules (sometimes called subprojects) to build as a part of this - project. Each module listed is a relative path to the directory containing the module. - To be consistent with the way default urls are calculated from parent, it is recommended - to have module names match artifact ids. - - - - - - - - - - 4.0.0+ - Distribution information for a project that enables deployment of the site - and artifacts to remote web servers and repositories respectively. - - - - - 4.0.0+ - - - Properties that can be used throughout the POM as a substitution, and - are used as filters in resources if enabled. - The format is <code>&lt;name&gt;value&lt;/name&gt;</code>. - - - - - - - - - - - - 4.0.0+ - Default dependency information for projects that inherit from this one. The - dependencies in this section are not immediately resolved. Instead, when a POM derived - from this one declares a dependency described by a matching groupId and artifactId, the - version and other values from this section are used for that dependency if they were not - already specified. - - - - - 3.0.0+ - - - This element describes all of the dependencies associated with a - project. - These dependencies are used to construct a classpath for your - project during the build process. They are automatically downloaded from the - repositories defined in this project. - See <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the - dependency mechanism</a> for more information. - - - - - - - - - - - - 4.0.0+ - The lists of the remote repositories for discovering dependencies and - extensions. - - - - - - - - - - 4.0.0+ - The lists of the remote repositories for discovering plugins for builds and - reports. - - - - - - - - - - 4.0.0+ - - - <b>Deprecated</b>. Now ignored by Maven. - - - - - - - - - - - - 4.0.0+ - - - This element includes the specification of report plugins to use - to generate the reports on the Maven-generated site. - These reports will be run when a user executes <code>mvn site</code>. - All of the reports will be included in the navigation bar for browsing. - - - - - - - - - 3.0.0+ - Generic informations for a build. - - - - - 3.0.0+ - The default goal (or phase in Maven 2) to execute when none is specified for - the project. Note that in case of a multi-module build, only the default goal of the top-level - project is relevant, i.e. the default goals of child modules are ignored. Since Maven 3, - multiple goals/phases can be separated by whitespace. - - - - - 3.0.0+ - - This element describes all of the classpath resources such as properties - files associated with a project. These resources are often included in the final - package. - The default value is <code>src/main/resources</code>. - - - - - - - - - - - 4.0.0+ - - This element describes all of the classpath resources such as properties - files associated with a project's unit tests. - The default value is <code>src/test/resources</code>. - - - - - - - - - - - 4.0.0+ - - The directory where all files generated by the build are placed. - The default value is <code>target</code>. - - - - - - 4.0.0+ - - - The filename (excluding the extension, and with no path information) that - the produced artifact will be called. - The default value is <code>${artifactId}-${version}</code>. - - - - - - - 4.0.0+ - The list of filter properties files that are used when filtering is enabled. - - - - - - - - - - 4.0.0+ - Default plugin information to be made available for reference by projects - derived from this one. This plugin configuration will not be resolved or bound to the - lifecycle unless referenced. Any local configuration for a given plugin will override - the plugin's entire definition here. - - - - - 4.0.0+ - The list of plugins to use. - - - - - - - - - - - - 4.0.0+ - - - The <code>&lt;plugin&gt;</code> element contains informations required for a plugin. - - - - - - - 4.0.0+ - The group ID of the plugin in the repository. - - - - - 4.0.0+ - The artifact ID of the plugin in the repository. - - - - - 4.0.0+ - The version (or valid range of versions) of the plugin to be used. - - - - - 4.0.0+ - - - Whether to load Maven extensions (such as packaging and type handlers) from - this plugin. For performance reasons, this should only be enabled when necessary. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>false</code>. - - - - - - - 4.0.0+ - Multiple specifications of a set of goals to execute during the build - lifecycle, each having (possibly) a different configuration. - - - - - - - - - - 4.0.0+ - Additional dependencies that this project needs to introduce to the plugin's - classloader. - - - - - - - - - - 4.0.0+ - - - <b>Deprecated</b>. Unused by Maven. - - - - - - - - - - - - 4.0.0+ - - - Whether any configuration should be propagated to child POMs. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>true</code>. - - - - - - - 0.0.0+ - - - <p>The configuration as DOM object.</p> - <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add - <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> - <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> - or <code>combine.self</code> attributes to the children of the configuration element:</p> - <ul> - <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> - <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> - </ul> - <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and - <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> - for more information.</p> - - - - - - - - - - - - - - 3.0.0+ - - - The <code>&lt;dependency&gt;</code> element contains information about a dependency - of the project. - - - - - - - 3.0.0+ - - - The project group that produced the dependency, e.g. - <code>org.apache.maven</code>. - - - - - - - 3.0.0+ - - - The unique id for an artifact produced by the project group, e.g. - <code>maven-artifact</code>. - - - - - - - 3.0.0+ - - - The version of the dependency, e.g. <code>3.2.1</code>. In Maven 2, this can also be - specified as a range of versions. - - - - - - - 4.0.0+ - - - The type of dependency. While it - usually represents the extension on the filename of the dependency, - that is not always the case. A type can be mapped to a different - extension and a classifier. - The type often corresponds to the packaging used, though this is also - not always the case. - Some examples are <code>jar</code>, <code>war</code>, <code>ejb-client</code> - and <code>test-jar</code>: see <a href="../maven-core/artifact-handlers.html">default - artifact handlers</a> for a list. - New types can be defined by plugins that set - <code>extensions</code> to <code>true</code>, so this is not a complete list. - - - - - - - 4.0.0+ - - - The classifier of the dependency. It is appended to - the filename after the version. This allows: - <ul> - <li>refering to attached artifact, for example <code>sources</code> and <code>javadoc</code>: - see <a href="../maven-core/artifact-handlers.html">default artifact handlers</a> for a list,</li> - <li>distinguishing two artifacts - that belong to the same POM but were built differently. - For example, <code>jdk14</code> and <code>jdk15</code>.</li> - </ul> - - - - - - - 4.0.0+ - - - The scope of the dependency - <code>compile</code>, <code>runtime</code>, - <code>test</code>, <code>system</code>, and <code>provided</code>. Used to - calculate the various classpaths used for compilation, testing, and so on. - It also assists in determining which artifacts to include in a distribution of - this project. For more information, see - <a href="http://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html">the - dependency mechanism</a>. - - - - - - - 4.0.0+ - - - FOR SYSTEM SCOPE ONLY. Note that use of this property is <b>discouraged</b> - and may be replaced in later versions. This specifies the path on the filesystem - for this dependency. - Requires an absolute path for the value, not relative. - Use a property that gives the machine specific absolute path, - e.g. <code>${java.home}</code>. - - - - - - - 4.0.0+ - Lists a set of artifacts that should be excluded from this dependency's - artifact list when it comes to calculating transitive dependencies. - - - - - - - - - - 4.0.0+ - - - Indicates the dependency is optional for use of this library. While the - version of the dependency will be taken into account for dependency calculation if the - library is used elsewhere, it will not be passed on transitively. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>false</code>. - - - - - - - - - 4.0.0+ - - - The <code>&lt;exclusion&gt;</code> element contains informations required to exclude - an artifact to the project. - - - - - - - 4.0.0+ - The artifact ID of the project to exclude. - - - - - 4.0.0+ - The group ID of the project to exclude. - - - - - - - 4.0.0+ - - - The <code>&lt;execution&gt;</code> element contains informations required for the - execution of a plugin. - - - - - - - 4.0.0+ - The identifier of this execution for labelling the goals during the build, - and for matching executions to merge during inheritance and profile injection. - - - - - 4.0.0+ - The build lifecycle phase to bind the goals in this execution to. If omitted, - the goals will be bound to the default phase specified by the plugin. - - - - - 4.0.0+ - The goals to execute with the given configuration. - - - - - - - - - - 4.0.0+ - - - Whether any configuration should be propagated to child POMs. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>true</code>. - - - - - - - 0.0.0+ - - - <p>The configuration as DOM object.</p> - <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add - <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> - <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> - or <code>combine.self</code> attributes to the children of the configuration element:</p> - <ul> - <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> - <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> - </ul> - <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and - <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> - for more information.</p> - - - - - - - - - - - - - - 3.0.0+ - This element describes all of the classpath resources associated with a project - or unit tests. - - - - - 3.0.0+ - - - Describe the resource target path. The path is relative to the target/classes - directory (i.e. <code>${project.build.outputDirectory}</code>). - For example, if you want that resource to appear in a specific package - (<code>org.apache.maven.messages</code>), you must specify this - element with this value: <code>org/apache/maven/messages</code>. - This is not required if you simply put the resources in that directory - structure at the source, however. - - - - - - - 3.0.0+ - - - Whether resources are filtered to replace tokens with parameterised values or not. - The values are taken from the <code>properties</code> element and from the - properties in the files listed in the <code>filters</code> element. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>false</code>. - - - - - - - 3.0.0+ - Describe the directory where the resources are stored. The path is relative - to the POM. - - - - - 3.0.0+ - - - A list of patterns to include, e.g. <code>**&#47;*.xml</code>. - - - - - - - - - - - - 3.0.0+ - - - A list of patterns to exclude, e.g. <code>**&#47;*.xml</code> - - - - - - - - - - - - - - 4.0.0+ - Section for management of default plugin information for use in a group of POMs. - - - - - - 4.0.0+ - The list of plugins to use. - - - - - - - - - - - - 4.0.0+ - Section for management of reports and their configuration. - - - - - 4.0.0+ - - - If true, then the default reports are not included in the site generation. - This includes the reports in the "Project Info" menu. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>false</code>. - - - - - - - 4.0.0+ - - - Where to store all of the generated reports. The default is - <code>${project.build.directory}/site</code>. - - - - - - - 4.0.0+ - The reporting plugins to use and their configuration. - - - - - - - - - - - - 4.0.0+ - - - The <code>&lt;plugin&gt;</code> element contains informations required for a report plugin. - - - - - - - 4.0.0+ - The group ID of the reporting plugin in the repository. - - - - - 4.0.0+ - The artifact ID of the reporting plugin in the repository. - - - - - 4.0.0+ - The version of the reporting plugin to be used. - - - - - 4.0.0+ - - - Multiple specifications of a set of reports, each having (possibly) different - configuration. This is the reporting parallel to an <code>execution</code> in the build. - - - - - - - - - - - - 4.0.0+ - - - Whether any configuration should be propagated to child POMs. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>true</code>. - - - - - - - 0.0.0+ - - - <p>The configuration as DOM object.</p> - <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add - <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> - <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> - or <code>combine.self</code> attributes to the children of the configuration element:</p> - <ul> - <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> - <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> - </ul> - <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and - <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> - for more information.</p> - - - - - - - - - - - - - - 4.0.0+ - Represents a set of reports and configuration to be used to generate them. - - - - - 0.0.0+ - The unique id for this report set, to be used during POM inheritance and profile injection - for merging of report sets. - - - - - - 4.0.0+ - The list of reports from this plugin which should be generated from this set. - - - - - - - - - - 4.0.0+ - - - Whether any configuration should be propagated to child POMs. Note: While the type - of this field is <code>String</code> for technical reasons, the semantic type is actually - <code>Boolean</code>. Default value is <code>true</code>. - - - - - - - 0.0.0+ - - - <p>The configuration as DOM object.</p> - <p>By default, every element content is trimmed, but starting with Maven 3.1.0, you can add - <code>xml:space="preserve"</code> to elements you want to preserve whitespace.</p> - <p>You can control how child POMs inherit configuration from parent POMs by adding <code>combine.children</code> - or <code>combine.self</code> attributes to the children of the configuration element:</p> - <ul> - <li><code>combine.children</code>: available values are <code>merge</code> (default) and <code>append</code>,</li> - <li><code>combine.self</code>: available values are <code>merge</code> (default) and <code>override</code>.</li> - </ul> - <p>See <a href="http://maven.apache.org/pom.html#Plugins">POM Reference documentation</a> and - <a href="http://plexus.codehaus.org/plexus-utils/apidocs/org/codehaus/plexus/util/xml/Xpp3DomUtils.html">Xpp3DomUtils</a> - for more information.</p> - - - - - - - - - - - - - - 4.0.0+ - The conditions within the build runtime environment which will trigger the - automatic inclusion of the build profile. Multiple conditions can be defined, which must - be all satisfied to activate the profile. - - - - - - 4.0.0+ - If set to true, this profile will be active unless another profile in this - pom is activated using the command line -P option or by one of that profile's - activators. - - - - - 4.0.0+ - - - Specifies that this profile will be activated when a matching JDK is detected. - For example, <code>1.4</code> only activates on JDKs versioned 1.4, - while <code>!1.4</code> matches any JDK that is not version 1.4. Ranges are supported too: - <code>[1.5,)</code> activates when the JDK is 1.5 minimum. - - - - - - - 4.0.0+ - Specifies that this profile will be activated when matching operating system - attributes are detected. - - - - - 4.0.0+ - Specifies that this profile will be activated when this system property is - specified. - - - - - 4.0.0+ - Specifies that this profile will be activated based on existence of a file. - - - - - - - 4.0.0+ - This is the property specification used to activate a profile. If the value field - is empty, then the existence of the named property will activate the profile, otherwise it - does a case-sensitive match against the property value as well. - - - - - 4.0.0+ - The name of the property to be used to activate a profile. - - - - - 4.0.0+ - The value of the property required to activate a profile. - - - - - - - 4.0.0+ - This is an activator which will detect an operating system's attributes in order - to activate its profile. - - - - - 4.0.0+ - - - The name of the operating system to be used to activate the profile. This must be an exact match - of the <code>${os.name}</code> Java property, such as <code>Windows XP</code>. - - - - - - - 4.0.0+ - - - The general family of the OS to be used to activate the profile, such as - <code>windows</code> or <code>unix</code>. - - - - - - - 4.0.0+ - The architecture of the operating system to be used to activate the - profile. - - - - - 4.0.0+ - The version of the operating system to be used to activate the - profile. - - - - - - - 4.0.0+ - This is the file specification used to activate the profile. The <code>missing</code> value - is the location of a file that needs to exist, and if it doesn't, the profile will be - activated. On the other hand, <code>exists</code> will test for the existence of the file and if it is - there, the profile will be activated.<br/> - Variable interpolation for these file specifications is limited to <code>${basedir}</code>, - System properties and request properties. - - - - - 4.0.0+ - The name of the file that must be missing to activate the - profile. - - - - - 4.0.0+ - The name of the file that must exist to activate the profile. - - - - - - - 4.0.0+ - Section for management of default dependency information for use in a group of - POMs. - - - - - 4.0.0+ - The dependencies specified here are not used until they are referenced in a - POM within the group. This allows the specification of a "standard" version for a - particular dependency. - - - - - - - - - - - - 3.0.0+ - - - The <code>&lt;build&gt;</code> element contains informations required to build the project. - Default values are defined in Super POM. - - - - - - - 3.0.0+ - - This element specifies a directory containing the source of the project. The - generated build system will compile the sources from this directory when the project is - built. The path given is relative to the project descriptor. - The default value is <code>src/main/java</code>. - - - - - - 4.0.0+ - - This element specifies a directory containing the script sources of the - project. This directory is meant to be different from the sourceDirectory, in that its - contents will be copied to the output directory in most cases (since scripts are - interpreted rather than compiled). - The default value is <code>src/main/scripts</code>. - - - - - - 4.0.0+ - - This element specifies a directory containing the unit test source of the - project. The generated build system will compile these directories when the project is - being tested. The path given is relative to the project descriptor. - The default value is <code>src/test/java</code>. - - - - - - 4.0.0+ - - The directory where compiled application classes are placed. - The default value is <code>target/classes</code>. - - - - - - 4.0.0+ - - The directory where compiled test classes are placed. - The default value is <code>target/test-classes</code>. - - - - - - 4.0.0+ - A set of build extensions to use from this project. - - - - - - - - - - 3.0.0+ - The default goal (or phase in Maven 2) to execute when none is specified for - the project. Note that in case of a multi-module build, only the default goal of the top-level - project is relevant, i.e. the default goals of child modules are ignored. Since Maven 3, - multiple goals/phases can be separated by whitespace. - - - - - 3.0.0+ - - This element describes all of the classpath resources such as properties - files associated with a project. These resources are often included in the final - package. - The default value is <code>src/main/resources</code>. - - - - - - - - - - - 4.0.0+ - - This element describes all of the classpath resources such as properties - files associated with a project's unit tests. - The default value is <code>src/test/resources</code>. - - - - - - - - - - - 4.0.0+ - - The directory where all files generated by the build are placed. - The default value is <code>target</code>. - - - - - - 4.0.0+ - - - The filename (excluding the extension, and with no path information) that - the produced artifact will be called. - The default value is <code>${artifactId}-${version}</code>. - - - - - - - 4.0.0+ - The list of filter properties files that are used when filtering is enabled. - - - - - - - - - - 4.0.0+ - Default plugin information to be made available for reference by projects - derived from this one. This plugin configuration will not be resolved or bound to the - lifecycle unless referenced. Any local configuration for a given plugin will override - the plugin's entire definition here. - - - - - 4.0.0+ - The list of plugins to use. - - - - - - - - - - - - 4.0.0+ - Describes a build extension to utilise. - - - - - 4.0.0+ - The group ID of the extension's artifact. - - - - - 4.0.0+ - The artifact ID of the extension. - - - - - 4.0.0+ - The version of the extension. - - - - - - - 3.0.0+ - Describes the licenses for this project. This is used to generate the license - page of the project's web site, as well as being taken into consideration in other reporting - and validation. The licenses listed for the project are that of the project itself, and not - of dependencies. - - - - - 3.0.0+ - The full legal name of the license. - - - - - 3.0.0+ - The official url for the license text. - - - - - 3.0.0+ - - - The primary method by which this project may be distributed. - <dl> - <dt>repo</dt> - <dd>may be downloaded from the Maven repository</dd> - <dt>manual</dt> - <dd>user must manually download and install the dependency.</dd> - </dl> - - - - - - - 3.0.0+ - Addendum information pertaining to this license. - - - - - - - 3.0.0+ - This element describes all of the mailing lists associated with a project. The - auto-generated site references this information. - - - - - 3.0.0+ - - - The name of the mailing list. - - - - - - - 3.0.0+ - - - The email address or link that can be used to subscribe to - the mailing list. If this is an email address, a - <code>mailto:</code> link will automatically be created - when the documentation is created. - - - - - - - 3.0.0+ - - - The email address or link that can be used to unsubscribe to - the mailing list. If this is an email address, a - <code>mailto:</code> link will automatically be created - when the documentation is created. - - - - - - - 3.0.0+ - - - The email address or link that can be used to post to - the mailing list. If this is an email address, a - <code>mailto:</code> link will automatically be created - when the documentation is created. - - - - - - - 3.0.0+ - The link to a URL where you can browse the mailing list archive. - - - - - 3.0.0+ - The link to alternate URLs where you can browse the list archive. - - - - - - - - - - - - 3.0.0+ - Information about one of the committers on this project. - - - - - 3.0.0+ - The unique ID of the developer in the SCM. - - - - - 3.0.0+ - The full name of the contributor. - - - - - 3.0.0+ - The email address of the contributor. - - - - - 3.0.0+ - The URL for the homepage of the contributor. - - - - - 3.0.0+ - The organization to which the contributor belongs. - - - - - 3.0.0+ - The URL of the organization. - - - - - 3.0.0+ - - - The roles the contributor plays in the project. Each role is described by a - <code>role</code> element, the body of which is a role name. This can also be used to - describe the contribution. - - - - - - - - - - - - 3.0.0+ - - - The timezone the contributor is in. Typically, this is a number in the range - <a href="http://en.wikipedia.org/wiki/UTC%E2%88%9212:00">-12</a> to <a href="http://en.wikipedia.org/wiki/UTC%2B14:00">+14</a> - or a valid time zone id like "America/Montreal" (UTC-05:00) or "Europe/Paris" (UTC+01:00). - - - - - - - 3.0.0+ - Properties about the contributor, such as an instant messenger handle. - - - - - - - - - - \ No newline at end of file diff --git a/_tools/scalastyle.xml b/_tools/scalastyle.xml deleted file mode 100644 index f7bb0d4819c..00000000000 --- a/_tools/scalastyle.xml +++ /dev/null @@ -1,146 +0,0 @@ - - - - - - - - - - - - Scalastyle standard configuration - - - - - - - - - - - - - - - - - - - true - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/_tools/site/css/maven-base.css b/_tools/site/css/maven-base.css deleted file mode 100644 index 53153e9fe1a..00000000000 --- a/_tools/site/css/maven-base.css +++ /dev/null @@ -1,155 +0,0 @@ -body { - margin: 0px; - padding: 0px; -} -img { - border:none; -} -table { - padding:0px; - width: 100%; - margin-left: -2px; - margin-right: -2px; -} -acronym { - cursor: help; - border-bottom: 1px dotted #feb; -} -table.bodyTable th, table.bodyTable td { - padding: 2px 4px 2px 4px; - vertical-align: top; -} -div.clear{ - clear:both; - visibility: hidden; -} -div.clear hr{ - display: none; -} -#bannerLeft, #bannerRight { - font-size: xx-large; - font-weight: bold; -} -#bannerLeft img, #bannerRight img { - margin: 0px; -} -.xleft, #bannerLeft img { - float:left; -} -.xright, #bannerRight { - float:right; -} -#banner { - padding: 0px; -} -#banner img { - border: none; -} -#breadcrumbs { - padding: 3px 10px 3px 10px; -} -#leftColumn { - width: 170px; - float:left; - overflow: auto; -} -#bodyColumn { - margin-right: 1.5em; - margin-left: 197px; -} -#legend { - padding: 8px 0 8px 0; -} -#navcolumn { - padding: 8px 4px 0 8px; -} -#navcolumn h5 { - margin: 0; - padding: 0; - font-size: small; -} -#navcolumn ul { - margin: 0; - padding: 0; - font-size: small; -} -#navcolumn li { - list-style-type: none; - background-image: none; - background-repeat: no-repeat; - background-position: 0 0.4em; - padding-left: 16px; - list-style-position: outside; - line-height: 1.2em; - font-size: smaller; -} -#navcolumn li.expanded { - background-image: url(../images/expanded.gif); -} -#navcolumn li.collapsed { - background-image: url(../images/collapsed.gif); -} -#navcolumn li.none { - text-indent: -1em; - margin-left: 1em; -} -#poweredBy { - text-align: center; -} -#navcolumn img { - margin-top: 10px; - margin-bottom: 3px; -} -#poweredBy img { - display:block; - margin: 20px 0 20px 17px; -} -#search img { - margin: 0px; - display: block; -} -#search #q, #search #btnG { - border: 1px solid #999; - margin-bottom:10px; -} -#search form { - margin: 0px; -} -#lastPublished { - font-size: x-small; -} -.navSection { - margin-bottom: 2px; - padding: 8px; -} -.navSectionHead { - font-weight: bold; - font-size: x-small; -} -.section { - padding: 4px; -} -#footer { - padding: 3px 10px 3px 10px; - font-size: x-small; -} -#breadcrumbs { - font-size: x-small; - margin: 0pt; -} -.source { - padding: 12px; - margin: 1em 7px 1em 7px; -} -.source pre { - margin: 0px; - padding: 0px; -} -#navcolumn img.imageLink, .imageLink { - padding-left: 0px; - padding-bottom: 0px; - padding-top: 0px; - padding-right: 2px; - border: 0px; - margin: 0px; -} diff --git a/_tools/site/css/maven-theme.css b/_tools/site/css/maven-theme.css deleted file mode 100644 index c982168bf24..00000000000 --- a/_tools/site/css/maven-theme.css +++ /dev/null @@ -1,141 +0,0 @@ -body { - padding: 0px 0px 10px 0px; -} -body, td, select, input, li{ - font-family: Verdana, Helvetica, Arial, sans-serif; - font-size: 13px; -} -code{ - font-family: Courier, monospace; - font-size: 13px; -} -a { - text-decoration: none; -} -a:link { - color:#36a; -} -a:visited { - color:#47a; -} -a:active, a:hover { - color:#69c; -} -#legend li.externalLink { - background: url(../images/external.png) left top no-repeat; - padding-left: 18px; -} -a.externalLink, a.externalLink:link, a.externalLink:visited, a.externalLink:active, a.externalLink:hover { - background: url(../images/external.png) right center no-repeat; - padding-right: 18px; -} -#legend li.newWindow { - background: url(../images/newwindow.png) left top no-repeat; - padding-left: 18px; -} -a.newWindow, a.newWindow:link, a.newWindow:visited, a.newWindow:active, a.newWindow:hover { - background: url(../images/newwindow.png) right center no-repeat; - padding-right: 18px; -} -h2 { - padding: 4px 4px 4px 6px; - border: 1px solid #999; - color: #900; - background-color: #ddd; - font-weight:900; - font-size: x-large; -} -h3 { - padding: 4px 4px 4px 6px; - border: 1px solid #aaa; - color: #900; - background-color: #eee; - font-weight: normal; - font-size: large; -} -h4 { - padding: 4px 4px 4px 6px; - border: 1px solid #bbb; - color: #900; - background-color: #fff; - font-weight: normal; - font-size: large; -} -h5 { - padding: 4px 4px 4px 6px; - color: #900; - font-size: normal; -} -p { - line-height: 1.3em; - font-size: small; -} -#breadcrumbs { - border-top: 1px solid #aaa; - border-bottom: 1px solid #aaa; - background-color: #ccc; -} -#leftColumn { - margin: 10px 0 0 5px; - border: 1px solid #999; - background-color: #eee; -} -#navcolumn h5 { - font-size: smaller; - border-bottom: 1px solid #aaaaaa; - padding-top: 2px; - color: #000; -} - -table.bodyTable th { - color: white; - background-color: #bbb; - text-align: left; - font-weight: bold; -} - -table.bodyTable th, table.bodyTable td { - font-size: 1em; -} - -table.bodyTable tr.a { - background-color: #ddd; -} - -table.bodyTable tr.b { - background-color: #eee; -} - -.source { - border: 1px solid #999; -} -dl { - padding: 4px 4px 4px 6px; - border: 1px solid #aaa; - background-color: #ffc; -} -dt { - color: #900; -} -#organizationLogo img, #projectLogo img, #projectLogo span{ - margin: 8px; -} -#banner { - border-bottom: 1px solid #fff; -} -.errormark, .warningmark, .donemark, .infomark { - background: url(../images/icon_error_sml.gif) no-repeat; -} - -.warningmark { - background-image: url(../images/icon_warning_sml.gif); -} - -.donemark { - background-image: url(../images/icon_success_sml.gif); -} - -.infomark { - background-image: url(../images/icon_info_sml.gif); -} - diff --git a/_tools/site/css/print.css b/_tools/site/css/print.css deleted file mode 100644 index f09d546c225..00000000000 --- a/_tools/site/css/print.css +++ /dev/null @@ -1,7 +0,0 @@ -#banner, #footer, #leftcol, #breadcrumbs, .docs #toc, .docs .courtesylinks, #leftColumn, #navColumn { - display: none !important; -} -#bodyColumn, body.docs div.docs { - margin: 0 !important; - border: none !important -} diff --git a/_tools/site/css/site.css b/_tools/site/css/site.css deleted file mode 100644 index 055e7e286ad..00000000000 --- a/_tools/site/css/site.css +++ /dev/null @@ -1 +0,0 @@ -/* You can override this file with your own styles */ \ No newline at end of file diff --git a/_tools/site/images/close.gif b/_tools/site/images/close.gif deleted file mode 100644 index 1c26bbc5264..00000000000 Binary files a/_tools/site/images/close.gif and /dev/null differ diff --git a/_tools/site/images/collapsed.gif b/_tools/site/images/collapsed.gif deleted file mode 100644 index 6e710840640..00000000000 Binary files a/_tools/site/images/collapsed.gif and /dev/null differ diff --git a/_tools/site/images/expanded.gif b/_tools/site/images/expanded.gif deleted file mode 100644 index 0fef3d89e0d..00000000000 Binary files a/_tools/site/images/expanded.gif and /dev/null differ diff --git a/_tools/site/images/external.png b/_tools/site/images/external.png deleted file mode 100644 index 3f999fc88b3..00000000000 Binary files a/_tools/site/images/external.png and /dev/null differ diff --git a/_tools/site/images/icon_error_sml.gif b/_tools/site/images/icon_error_sml.gif deleted file mode 100644 index 61132ef2b01..00000000000 Binary files a/_tools/site/images/icon_error_sml.gif and /dev/null differ diff --git a/_tools/site/images/icon_info_sml.gif b/_tools/site/images/icon_info_sml.gif deleted file mode 100644 index c6cb9ad7ce4..00000000000 Binary files a/_tools/site/images/icon_info_sml.gif and /dev/null differ diff --git a/_tools/site/images/icon_success_sml.gif b/_tools/site/images/icon_success_sml.gif deleted file mode 100644 index 52e85a430af..00000000000 Binary files a/_tools/site/images/icon_success_sml.gif and /dev/null differ diff --git a/_tools/site/images/icon_warning_sml.gif b/_tools/site/images/icon_warning_sml.gif deleted file mode 100644 index 873bbb52cb9..00000000000 Binary files a/_tools/site/images/icon_warning_sml.gif and /dev/null differ diff --git a/_tools/site/images/logos/build-by-maven-black.png b/_tools/site/images/logos/build-by-maven-black.png deleted file mode 100644 index 919fd0f66a7..00000000000 Binary files a/_tools/site/images/logos/build-by-maven-black.png and /dev/null differ diff --git a/_tools/site/images/logos/build-by-maven-white.png b/_tools/site/images/logos/build-by-maven-white.png deleted file mode 100644 index 7d44c9c2e57..00000000000 Binary files a/_tools/site/images/logos/build-by-maven-white.png and /dev/null differ diff --git a/_tools/site/images/logos/maven-feather.png b/_tools/site/images/logos/maven-feather.png deleted file mode 100644 index b5ada836e9e..00000000000 Binary files a/_tools/site/images/logos/maven-feather.png and /dev/null differ diff --git a/_tools/site/images/newwindow.png b/_tools/site/images/newwindow.png deleted file mode 100644 index 6287f72bd08..00000000000 Binary files a/_tools/site/images/newwindow.png and /dev/null differ diff --git a/_tools/site/images/rss.png b/_tools/site/images/rss.png deleted file mode 100644 index f0796ac8862..00000000000 Binary files a/_tools/site/images/rss.png and /dev/null differ diff --git a/alluxio/pom.xml b/alluxio/pom.xml index f9d756d3df1..87d374db3e3 100644 --- a/alluxio/pom.xml +++ b/alluxio/pom.xml @@ -23,7 +23,7 @@ zeppelin-interpreter-parent org.apache.zeppelin - 0.10.0-SNAPSHOT + 0.12.0-SNAPSHOT ../zeppelin-interpreter-parent/pom.xml @@ -32,72 +32,61 @@ Zeppelin: Alluxio interpreter - 1.0.0 + 2.9.0 alluxio - com.google.guava - guava - 15.0 - - - - org.alluxio - alluxio-shell - ${alluxio.version} + org.alluxio + alluxio-shell + ${alluxio.version} - org.mockito - mockito-all - test - - - - org.powermock - powermock-api-mockito - test - - - - org.powermock - powermock-core + com.google.guava + guava + 31.0.1-jre test - - org.powermock - powermock-module-junit4 + com.google.protobuf + protobuf-java + 3.16.1 test - org.powermock - powermock-reflect + org.mockito + mockito-core test org.alluxio - alluxio-core-server + alluxio-minicluster ${alluxio.version} test + + + org.apache.hadoop + hadoop-client + + - org.alluxio - alluxio-minicluster - ${alluxio.version} + org.apache.hadoop + hadoop-client-api + ${hadoop.version} test - org.alluxio - alluxio-underfs-local - ${alluxio.version} + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} test @@ -107,9 +96,6 @@ maven-enforcer-plugin - - maven-dependency-plugin - maven-resources-plugin @@ -119,9 +105,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - - false - diff --git a/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java b/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java index be912ecab5e..5478926ec9d 100644 --- a/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java +++ b/alluxio/src/main/java/org/apache/zeppelin/alluxio/AlluxioInterpreter.java @@ -18,6 +18,10 @@ package org.apache.zeppelin.alluxio; +import alluxio.cli.fs.FileSystemShell; +import alluxio.conf.Configuration; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,9 +33,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Properties; +import java.util.stream.Stream; -import alluxio.Configuration; -import alluxio.shell.AlluxioShell; import org.apache.zeppelin.completer.CompletionType; import org.apache.zeppelin.interpreter.Interpreter; @@ -44,13 +47,13 @@ * Alluxio interpreter for Zeppelin. */ public class AlluxioInterpreter extends Interpreter { - - Logger logger = LoggerFactory.getLogger(AlluxioInterpreter.class); + + private static final Logger LOGGER = LoggerFactory.getLogger(AlluxioInterpreter.class); protected static final String ALLUXIO_MASTER_HOSTNAME = "alluxio.master.hostname"; protected static final String ALLUXIO_MASTER_PORT = "alluxio.master.port"; - private AlluxioShell fs; + private FileSystemShell fs; private int totalCommands = 0; private int completedCommands = 0; @@ -73,24 +76,33 @@ public AlluxioInterpreter(Properties property) { alluxioMasterPort = property.getProperty(ALLUXIO_MASTER_PORT); } + private Stream filteredProperties(String prefix) { + return properties.stringPropertyNames().stream().filter( + propertyKey -> propertyKey.startsWith(prefix) + ); + } + @Override public void open() { - logger.info("Starting Alluxio shell to connect to " + alluxioMasterHostname + + LOGGER.info("Starting Alluxio shell to connect to " + alluxioMasterHostname + " on port " + alluxioMasterPort); + // Setting the extra parameters being set in the interpreter config starting with alluxio + filteredProperties("alluxio.").forEach(x -> System.setProperty(x, properties.getProperty(x))); + + System.setProperty(PropertyKey.USER_RPC_RETRY_MAX_DURATION.getName(), "5s"); - System.setProperty(ALLUXIO_MASTER_HOSTNAME, alluxioMasterHostname); - System.setProperty(ALLUXIO_MASTER_PORT, alluxioMasterPort); - fs = new AlluxioShell(new Configuration()); + AlluxioConfiguration conf = Configuration.global(); + // Reduce the RPC retry max duration to fall earlier for CLIs + fs = new FileSystemShell(conf); } @Override public void close() { - logger.info("Closing Alluxio shell"); - + LOGGER.info("Closing Alluxio shell"); try { fs.close(); } catch (IOException e) { - logger.error("Cannot close connection", e); + LOGGER.error("Cannot close connection", e); } } diff --git a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java index 06711de3265..00405113a28 100644 --- a/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java +++ b/alluxio/src/test/java/org/apache/zeppelin/alluxio/AlluxioInterpreterTest.java @@ -18,14 +18,13 @@ package org.apache.zeppelin.alluxio; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import alluxio.conf.Configuration; +import alluxio.grpc.WritePType; +import alluxio.client.file.FileSystemTestUtils; +import alluxio.master.LocalAlluxioCluster; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -33,17 +32,9 @@ import java.util.Properties; import alluxio.AlluxioURI; -import alluxio.Constants; -import alluxio.client.FileSystemTestUtils; -import alluxio.client.WriteType; -import alluxio.client.file.FileInStream; import alluxio.client.file.FileSystem; import alluxio.client.file.URIStatus; import alluxio.exception.AlluxioException; -import alluxio.exception.ExceptionMessage; -import alluxio.master.LocalAlluxioCluster; -import alluxio.shell.command.CommandUtils; -import alluxio.util.FormatUtils; import alluxio.util.io.BufferUtils; import alluxio.util.io.PathUtils; @@ -51,37 +42,49 @@ import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static alluxio.cli.fs.command.CountCommand.COUNT_FORMAT; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; public class AlluxioInterpreterTest { private AlluxioInterpreter alluxioInterpreter; - private static final int SIZE_BYTES = Constants.MB * 10; private LocalAlluxioCluster mLocalAlluxioCluster = null; private FileSystem fs = null; - @After - public final void after() throws Exception { + @AfterEach + final void after() throws Exception { if (alluxioInterpreter != null) { alluxioInterpreter.close(); } + mLocalAlluxioCluster.stop(); } - @Before - public final void before() throws Exception { - mLocalAlluxioCluster = new LocalAlluxioCluster(SIZE_BYTES, 1000); + @BeforeEach + final void before() throws Exception { + mLocalAlluxioCluster = new LocalAlluxioCluster(1, false); + mLocalAlluxioCluster.initConfiguration("alluxio-test"); + Configuration.global().validate(); mLocalAlluxioCluster.start(); + fs = mLocalAlluxioCluster.getClient(); final Properties props = new Properties(); - props.put(AlluxioInterpreter.ALLUXIO_MASTER_HOSTNAME, mLocalAlluxioCluster.getMasterHostname()); - props.put(AlluxioInterpreter.ALLUXIO_MASTER_PORT, mLocalAlluxioCluster.getMasterPort() + ""); + props.put(AlluxioInterpreter.ALLUXIO_MASTER_HOSTNAME, mLocalAlluxioCluster.getHostname()); + props.put(AlluxioInterpreter.ALLUXIO_MASTER_PORT, mLocalAlluxioCluster.getMasterRpcPort() + ""); alluxioInterpreter = new AlluxioInterpreter(props); alluxioInterpreter.open(); } @Test - public void testCompletion() { - List expectedResultOne = Arrays.asList( + void testCompletion() { + List expectedResultOne = Arrays.asList( new InterpreterCompletion("cat", "cat", CompletionType.command.name()), new InterpreterCompletion("chgrp", "chgrp", CompletionType.command.name()), new InterpreterCompletion("chmod", "chmod", CompletionType.command.name()), @@ -90,18 +93,18 @@ public void testCompletion() { new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name()), new InterpreterCompletion("count", "count", CompletionType.command.name()), new InterpreterCompletion("createLineage", "createLineage", CompletionType.command.name())); - List expectedResultTwo = Arrays.asList( + List expectedResultTwo = Arrays.asList( new InterpreterCompletion("copyFromLocal", "copyFromLocal", CompletionType.command.name()), new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name()), new InterpreterCompletion("count", "count", CompletionType.command.name())); - List expectedResultThree = Arrays.asList( + List expectedResultThree = Arrays.asList( new InterpreterCompletion("copyFromLocal", "copyFromLocal", CompletionType.command.name()), new InterpreterCompletion("copyToLocal", "copyToLocal", CompletionType.command.name())); - List expectedResultNone = new ArrayList<>(); + List expectedResultNone = new ArrayList<>(); List resultOne = alluxioInterpreter.completion("c", 0, null); List resultTwo = alluxioInterpreter.completion("co", 0, null); @@ -109,255 +112,92 @@ public void testCompletion() { List resultNotMatch = alluxioInterpreter.completion("notMatch", 0, null); List resultAll = alluxioInterpreter.completion("", 0, null); - Assert.assertEquals(expectedResultOne, resultOne); - Assert.assertEquals(expectedResultTwo, resultTwo); - Assert.assertEquals(expectedResultThree, resultThree); - Assert.assertEquals(expectedResultNone, resultNotMatch); + assertEquals(expectedResultOne, resultOne); + assertEquals(expectedResultTwo, resultTwo); + assertEquals(expectedResultThree, resultThree); + assertEquals(expectedResultNone, resultNotMatch); - List allCompletionList = new ArrayList<>(); + List allCompletionList = new ArrayList<>(); for (InterpreterCompletion ic : resultAll) { allCompletionList.add(ic.getName()); } - Assert.assertEquals(alluxioInterpreter.keywords, allCompletionList); - } - - @Test - public void catDirectoryTest() throws IOException { - String expected = "Successfully created directory /testDir\n\n" + - "Path /testDir must be a file\n"; - - InterpreterResult output = alluxioInterpreter.interpret("mkdir /testDir" + - "\ncat /testDir", null); - - Assert.assertEquals(Code.ERROR, output.code()); - Assert.assertEquals(expected, output.message().get(0).getData()); + assertEquals(alluxioInterpreter.keywords, allCompletionList); } @Test - public void catNotExistTest() throws IOException { - InterpreterResult output = alluxioInterpreter.interpret("cat /testFile", null); - Assert.assertEquals(Code.ERROR, output.code()); - } - - @Test - public void catTest() throws IOException { - FileSystemTestUtils.createByteFile(fs, "/testFile", WriteType.MUST_CACHE, - 10, 10); + void catTest() throws IOException { + FileSystemTestUtils.createByteFile(fs, "/testFile", WritePType.MUST_CACHE, 10, 10); InterpreterResult output = alluxioInterpreter.interpret("cat /testFile", null); byte[] expected = BufferUtils.getIncreasingByteArray(10); - Assert.assertEquals(Code.SUCCESS, output.code()); - Assert.assertArrayEquals(expected, + assertEquals(Code.SUCCESS, output.code()); + assertArrayEquals(expected, output.message().get(0).getData().substring(0, output.message().get(0).getData().length() - 1).getBytes()); } @Test - public void copyFromLocalLargeTest() throws IOException, AlluxioException { - File testFile = new File(mLocalAlluxioCluster.getAlluxioHome() + "/testFile"); - testFile.createNewFile(); - FileOutputStream fos = new FileOutputStream(testFile); - byte[] toWrite = BufferUtils.getIncreasingByteArray(SIZE_BYTES); - fos.write(toWrite); - fos.close(); - - InterpreterResult output = alluxioInterpreter.interpret("copyFromLocal " + - testFile.getAbsolutePath() + " /testFile", null); - Assert.assertEquals( - "Copied " + testFile.getAbsolutePath() + " to /testFile\n\n", - output.message().get(0).getData()); - - long fileLength = fs.getStatus(new AlluxioURI("/testFile")).getLength(); - Assert.assertEquals(SIZE_BYTES, fileLength); - - FileInStream fStream = fs.openFile(new AlluxioURI("/testFile")); - byte[] read = new byte[SIZE_BYTES]; - fStream.read(read); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(SIZE_BYTES, read)); - } - - @Test - public void loadFileTest() throws IOException, AlluxioException { - FileSystemTestUtils.createByteFile(fs, "/testFile", WriteType.CACHE_THROUGH, 10, 10); + void loadFileTest() throws IOException, AlluxioException { + FileSystemTestUtils.createByteFile(fs, "/testFile", WritePType.CACHE_THROUGH, 10, 10); int memPercentage = fs.getStatus(new AlluxioURI("/testFile")).getInMemoryPercentage(); - Assert.assertFalse(memPercentage == 0); + assertNotEquals(0, memPercentage); alluxioInterpreter.interpret("load /testFile", null); memPercentage = fs.getStatus(new AlluxioURI("/testFile")).getInMemoryPercentage(); - Assert.assertTrue(memPercentage == 100); + assertEquals(100, memPercentage); } @Test - public void loadDirTest() throws IOException, AlluxioException { - FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA", WriteType.CACHE_THROUGH, 10, 10); - FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileB", WriteType.MUST_CACHE, 10, 10); - - int memPercentageA = fs.getStatus( - new AlluxioURI("/testRoot/testFileA")).getInMemoryPercentage(); - int memPercentageB = fs.getStatus( - new AlluxioURI("/testRoot/testFileB")).getInMemoryPercentage(); - Assert.assertFalse(memPercentageA == 0); - Assert.assertTrue(memPercentageB == 100); - - alluxioInterpreter.interpret("load /testRoot", null); - - memPercentageA = fs.getStatus(new AlluxioURI("/testRoot/testFileA")).getInMemoryPercentage(); - memPercentageB = fs.getStatus(new AlluxioURI("/testRoot/testFileB")).getInMemoryPercentage(); - Assert.assertTrue(memPercentageA == 100); - Assert.assertTrue(memPercentageB == 100); - } - - @Test - public void copyFromLocalTest() throws IOException, AlluxioException { - File testDir = new File(mLocalAlluxioCluster.getAlluxioHome() + "/testDir"); - testDir.mkdir(); - File testDirInner = new File(mLocalAlluxioCluster.getAlluxioHome() + "/testDir/testDirInner"); - testDirInner.mkdir(); - File testFile = - generateFileContent("/testDir/testFile", BufferUtils.getIncreasingByteArray(10)); - - generateFileContent("/testDir/testDirInner/testFile2", - BufferUtils.getIncreasingByteArray(10, 20)); - - InterpreterResult output = alluxioInterpreter.interpret("copyFromLocal " + - testFile.getParent() + " /testDir", null); - Assert.assertEquals( - "Copied " + testFile.getParent() + " to /testDir\n\n", - output.message().get(0).getData()); - - long fileLength1 = fs.getStatus(new AlluxioURI("/testDir/testFile")).getLength(); - long fileLength2 = fs.getStatus(new AlluxioURI("/testDir/testDirInner/testFile2")).getLength(); - Assert.assertEquals(10, fileLength1); - Assert.assertEquals(20, fileLength2); - - FileInStream fStream1 = fs.openFile(new AlluxioURI("/testDir/testFile")); - FileInStream fStream2 = fs.openFile(new AlluxioURI("/testDir/testDirInner/testFile2")); - byte[] read = new byte[10]; - fStream1.read(read); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(10, read)); - read = new byte[20]; - fStream2.read(read); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(10, 20, read)); - } - - @Test - public void copyFromLocalTestWithFullURI() throws IOException, AlluxioException { - File testFile = generateFileContent("/srcFileURI", BufferUtils.getIncreasingByteArray(10)); - String uri = "tachyon://" + mLocalAlluxioCluster.getMasterHostname() + ":" - + mLocalAlluxioCluster.getMasterPort() + "/destFileURI"; - - InterpreterResult output = alluxioInterpreter.interpret("copyFromLocal " + - testFile.getPath() + " " + uri, null); - Assert.assertEquals( - "Copied " + testFile.getPath() + " to " + uri + "\n\n", - output.message().get(0).getData()); - - long fileLength = fs.getStatus(new AlluxioURI("/destFileURI")).getLength(); - Assert.assertEquals(10L, fileLength); - - FileInStream fStream = fs.openFile(new AlluxioURI("/destFileURI")); - byte[] read = new byte[10]; - fStream.read(read); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(10, read)); - } - - @Test - public void copyFromLocalFileToDstPathTest() throws IOException, AlluxioException { - String dataString = "copyFromLocalFileToDstPathTest"; - byte[] data = dataString.getBytes(); - File localDir = new File(mLocalAlluxioCluster.getAlluxioHome() + "/localDir"); - localDir.mkdir(); - File localFile = generateFileContent("/localDir/testFile", data); - - alluxioInterpreter.interpret("mkdir /dstDir", null); - alluxioInterpreter.interpret("copyFromLocal " + localFile.getPath() + " /dstDir", null); - - FileInStream fStream = fs.openFile(new AlluxioURI("/dstDir/testFile")); - long fileLength = fs.getStatus(new AlluxioURI("/dstDir/testFile")).getLength(); - - byte[] read = new byte[(int) fileLength]; - fStream.read(read); - Assert.assertEquals(new String(read), dataString); - } - - @Test - public void copyToLocalLargeTest() throws IOException { - copyToLocalWithBytes(SIZE_BYTES); - } - - @Test - public void copyToLocalTest() throws IOException { - copyToLocalWithBytes(10); - } - - private void copyToLocalWithBytes(int bytes) throws IOException { - FileSystemTestUtils.createByteFile(fs, "/testFile", WriteType.MUST_CACHE, 10, 10); + void copyToLocalTest() throws IOException { + FileSystemTestUtils.createByteFile(fs, "/testFile", WritePType.MUST_CACHE, 10, 10); InterpreterResult output = alluxioInterpreter.interpret("copyToLocal /testFile " + mLocalAlluxioCluster.getAlluxioHome() + "/testFile", null); - Assert.assertEquals( - "Copied /testFile to " + mLocalAlluxioCluster.getAlluxioHome() + "/testFile\n\n", + assertEquals( + "Copied /testFile to file://" + mLocalAlluxioCluster.getAlluxioHome() + "/testFile\n\n", output.message().get(0).getData()); fileReadTest("/testFile", 10); } @Test - public void countNotExistTest() throws IOException { - InterpreterResult output = alluxioInterpreter.interpret("count /NotExistFile", null); - Assert.assertEquals(Code.ERROR, output.code()); - Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n", - output.message().get(0).getData()); - } - - @Test - public void countTest() throws IOException { + void countTest() throws IOException { FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA", - WriteType.CACHE_THROUGH, 10, 10); + WritePType.MUST_CACHE, 10); FileSystemTestUtils.createByteFile(fs, "/testRoot/testDir/testFileB", - WriteType.CACHE_THROUGH, 20, 20); + WritePType.MUST_CACHE, 20); FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileB", - WriteType.CACHE_THROUGH, 30, 30); + WritePType.MUST_CACHE, 30); InterpreterResult output = alluxioInterpreter.interpret("count /testRoot", null); String expected = ""; - String format = "%-25s%-25s%-15s\n"; - expected += String.format(format, "File Count", "Folder Count", "Total Bytes"); - expected += String.format(format, 3, 2, 60); + expected += String.format(COUNT_FORMAT, "File Count", "Folder Count", "Folder Size"); + expected += String.format(COUNT_FORMAT, 3, 1, 60); expected += "\n"; - Assert.assertEquals(expected, output.message().get(0).getData()); - } + assertEquals(expected, output.message().get(0).getData()); - @Test - public void fileinfoNotExistTest() throws IOException { - InterpreterResult output = alluxioInterpreter.interpret("fileInfo /NotExistFile", null); - Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n", - output.message().get(0).getData()); - Assert.assertEquals(Code.ERROR, output.code()); + InterpreterResult output2 = alluxioInterpreter.interpret("count -h /testRoot", null); + String expected2 = ""; + expected2 += String.format(COUNT_FORMAT, "File Count", "Folder Count", "Folder Size"); + expected2 += String.format(COUNT_FORMAT, 3, 1, "60B"); + expected2 += "\n"; + assertEquals(expected2, output2.message().get(0).getData()); } @Test - public void locationNotExistTest() throws IOException { - InterpreterResult output = alluxioInterpreter.interpret("location /NotExistFile", null); - Assert.assertEquals(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage("/NotExistFile") + "\n", - output.message().get(0).getData()); - Assert.assertEquals(Code.ERROR, output.code()); - } - - @Test - public void lsTest() throws IOException, AlluxioException { + void lsTest() throws IOException, AlluxioException { URIStatus[] files = new URIStatus[3]; FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA", - WriteType.MUST_CACHE, 10, 10); + WritePType.MUST_CACHE, 10, 10); FileSystemTestUtils.createByteFile(fs, "/testRoot/testDir/testFileB", - WriteType.MUST_CACHE, 20, 20); + WritePType.MUST_CACHE, 20, 20); FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileC", - WriteType.THROUGH, 30, 30); + WritePType.THROUGH, 30, 30); files[0] = fs.getStatus(new AlluxioURI("/testRoot/testFileA")); files[1] = fs.getStatus(new AlluxioURI("/testRoot/testDir")); @@ -365,120 +205,20 @@ public void lsTest() throws IOException, AlluxioException { InterpreterResult output = alluxioInterpreter.interpret("ls /testRoot", null); - String expected = ""; - String format = "%-10s%-25s%-15s%-5s\n"; - expected += String.format(format, FormatUtils.getSizeFromBytes(10), - CommandUtils.convertMsToDate(files[0].getCreationTimeMs()), "In Memory", - "/testRoot/testFileA"); - expected += String.format(format, FormatUtils.getSizeFromBytes(0), - CommandUtils.convertMsToDate(files[1].getCreationTimeMs()), "", "/testRoot/testDir"); - expected += String.format(format, FormatUtils.getSizeFromBytes(30), - CommandUtils.convertMsToDate(files[2].getCreationTimeMs()), "Not In Memory", - "/testRoot/testFileC"); - expected += "\n"; - - Assert.assertEquals(Code.SUCCESS, output.code()); - Assert.assertEquals(expected, output.message().get(0).getData()); - } - - @Test - public void lsRecursiveTest() throws IOException, AlluxioException { - URIStatus[] files = new URIStatus[4]; - - FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileA", - WriteType.MUST_CACHE, 10, 10); - FileSystemTestUtils.createByteFile(fs, "/testRoot/testDir/testFileB", - WriteType.MUST_CACHE, 20, 20); - FileSystemTestUtils.createByteFile(fs, "/testRoot/testFileC", - WriteType.THROUGH, 30, 30); - - files[0] = fs.getStatus(new AlluxioURI("/testRoot/testFileA")); - files[1] = fs.getStatus(new AlluxioURI("/testRoot/testDir")); - files[2] = fs.getStatus(new AlluxioURI("/testRoot/testDir/testFileB")); - files[3] = fs.getStatus(new AlluxioURI("/testRoot/testFileC")); - - InterpreterResult output = alluxioInterpreter.interpret("ls -R /testRoot", null); - - String expected = ""; - String format = "%-10s%-25s%-15s%-5s\n"; - expected += - String.format(format, FormatUtils.getSizeFromBytes(10), - CommandUtils.convertMsToDate(files[0].getCreationTimeMs()), "In Memory", - "/testRoot/testFileA"); - expected += - String.format(format, FormatUtils.getSizeFromBytes(0), - CommandUtils.convertMsToDate(files[1].getCreationTimeMs()), "", - "/testRoot/testDir"); - expected += - String.format(format, FormatUtils.getSizeFromBytes(20), - CommandUtils.convertMsToDate(files[2].getCreationTimeMs()), "In Memory", - "/testRoot/testDir/testFileB"); - expected += - String.format(format, FormatUtils.getSizeFromBytes(30), - CommandUtils.convertMsToDate(files[3].getCreationTimeMs()), "Not In Memory", - "/testRoot/testFileC"); - expected += "\n"; - - Assert.assertEquals(expected, output.message().get(0).getData()); - } - - @Test - public void mkdirComplexPathTest() throws IOException, AlluxioException { - InterpreterResult output = alluxioInterpreter.interpret( - "mkdir /Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File", null); - - boolean existsDir = fs.exists(new AlluxioURI("/Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File")); - Assert.assertEquals( - "Successfully created directory /Complex!@#$%^&*()-_=+[]{};\"'<>,.?/File\n\n", - output.message().get(0).getData()); - Assert.assertTrue(existsDir); - } - - @Test - public void mkdirExistingTest() throws IOException { - String command = "mkdir /festFile1"; - Assert.assertEquals(Code.SUCCESS, alluxioInterpreter.interpret(command, null).code()); - Assert.assertEquals(Code.ERROR, alluxioInterpreter.interpret(command, null).code()); - } - - @Test - public void mkdirInvalidPathTest() throws IOException { - Assert.assertEquals( - Code.ERROR, - alluxioInterpreter.interpret("mkdir /test File Invalid Path", null).code()); + assertEquals(Code.SUCCESS, output.code()); } @Test - public void mkdirShortPathTest() throws IOException, AlluxioException { - InterpreterResult output = alluxioInterpreter.interpret("mkdir /root/testFile1", null); - boolean existsDir = fs.exists(new AlluxioURI("/root/testFile1")); - Assert.assertEquals( - "Successfully created directory /root/testFile1\n\n", - output.message().get(0).getData()); - Assert.assertTrue(existsDir); - } - - @Test - public void mkdirTest() throws IOException, AlluxioException { + void mkdirTest() throws IOException, AlluxioException { String qualifiedPath = - "tachyon://" + mLocalAlluxioCluster.getMasterHostname() + ":" - + mLocalAlluxioCluster.getMasterPort() + "/root/testFile1"; + "alluxio://" + mLocalAlluxioCluster.getHostname() + ":" + + mLocalAlluxioCluster.getMasterRpcPort() + "/root/testFile1"; InterpreterResult output = alluxioInterpreter.interpret("mkdir " + qualifiedPath, null); boolean existsDir = fs.exists(new AlluxioURI("/root/testFile1")); - Assert.assertEquals( + assertEquals( "Successfully created directory " + qualifiedPath + "\n\n", output.message().get(0).getData()); - Assert.assertTrue(existsDir); - } - - private File generateFileContent(String path, byte[] toWrite) - throws IOException { - File testFile = new File(mLocalAlluxioCluster.getAlluxioHome() + path); - testFile.createNewFile(); - FileOutputStream fos = new FileOutputStream(testFile); - fos.write(toWrite); - fos.close(); - return testFile; + assertTrue(existsDir); } private void fileReadTest(String fileName, int size) throws IOException { @@ -487,6 +227,6 @@ private void fileReadTest(String fileName, int size) throws IOException { byte[] read = new byte[size]; fis.read(read); fis.close(); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(size, read)); + assertTrue(BufferUtils.equalIncreasingByteArray(size, read)); } } diff --git a/angular/pom.xml b/angular/pom.xml index 609814a9bce..aa3989e8bfc 100644 --- a/angular/pom.xml +++ b/angular/pom.xml @@ -23,7 +23,7 @@ zeppelin-interpreter-parent org.apache.zeppelin - 0.10.0-SNAPSHOT + 0.12.0-SNAPSHOT ../zeppelin-interpreter-parent/pom.xml @@ -40,9 +40,6 @@ maven-enforcer-plugin - - maven-dependency-plugin - maven-resources-plugin @@ -52,9 +49,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - - false - diff --git a/beam/README.md b/beam/README.md deleted file mode 100644 index 948c95cfc0f..00000000000 --- a/beam/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Overview -Beam interpreter for Apache Zeppelin - -# Architecture -Current interpreter implementation supports the static repl. It compiles the code in memory, execute it and redirect the output to zeppelin. - -## Building the Beam Interpreter -You have to first build the Beam interpreter by enable the **beam** profile as follows: - -``` -mvn clean package -Pbeam -DskipTests -Pscala-2.10 -``` - -### Notice -- Flink runner comes with binary compiled for scala 2.10. So, currently we support only Scala 2.10 - -### Technical overview - - * Upon starting an interpreter, an instance of `JavaCompiler` is created. - - * When the user runs commands with beam, the `JavaParser` go through the code to get a class that contains the main method. - - * Then it replaces the class name with random class name to avoid overriding while compilation. it creates new out & err stream to get the data in new stream instead of the console, to redirect output to zeppelin. - - * If there is any error during compilation, it can catch and redirect to zeppelin. diff --git a/beam/pom.xml b/beam/pom.xml deleted file mode 100644 index 199d503d1db..00000000000 --- a/beam/pom.xml +++ /dev/null @@ -1,272 +0,0 @@ - - - - - 4.0.0 - - - zeppelin-interpreter-parent - org.apache.zeppelin - 0.10.0-SNAPSHOT - ../zeppelin-interpreter-parent/pom.xml - - - zeppelin-beam - jar - Zeppelin: Beam interpreter - - - beam - - 2.3.0 - 1.6.2 - 2.0.0 - 2.10 - - - 4.1.42.Final - 3.1.0 - 1.8.1 - - - - - io.netty - netty-all - ${netty.version} - - - - org.apache.spark - spark-core_${beam.scala.binary.version} - ${beam.spark.version} - - - slf4j-log4j12 - org.slf4j - - - netty-all - io.netty - - - akka-actor_${beam.scala.binary.version} - org.spark-project.akka - - - akka-remote_${beam.scala.binary.version} - org.spark-project.akka - - - akka-slf4j_${beam.scala.binary.version} - org.spark-project.akka - - - - - - org.apache.spark - spark-streaming_${beam.scala.binary.version} - ${beam.spark.version} - - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${beam.hadoop.version} - - - slf4j-log4j12 - org.slf4j - - - - - - org.apache.hadoop - hadoop-common - ${beam.hadoop.version} - - - slf4j-log4j12 - org.slf4j - - - - - - org.apache.zeppelin - zeppelin-java - ${project.version} - - - - org.apache.zeppelin - zeppelin-scio - ${project.version} - - - - org.apache.hadoop - hadoop-hdfs - ${beam.hadoop.version} - - - - org.apache.hadoop - hadoop-client - ${beam.hadoop.version} - - - slf4j-log4j12 - org.slf4j - - - - - - org.apache.hadoop - hadoop-annotations - ${beam.hadoop.version} - - - - org.apache.hadoop - hadoop-yarn-common - ${beam.hadoop.version} - - - - org.apache.hadoop - hadoop-mapreduce-client-common - ${beam.hadoop.version} - - - slf4j-log4j12 - org.slf4j - - - - - - com.thoughtworks.qdox - qdox - 2.0-M3 - - - - org.apache.beam - beam-runners-parent - ${beam.beam.version} - pom - - - - org.apache.beam - beam-runners-core-java - ${beam.beam.version} - - - google-http-client-jackson2 - com.google.http-client - - - - - - org.apache.beam - beam-runners-direct-java - ${beam.beam.version} - - - - javax.servlet - javax.servlet-api - ${servlet.api.version} - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - ${beam.beam.version} - - - google-http-client-jackson2 - com.google.http-client - - - - - - org.apache.beam - beam-runners-spark - ${beam.beam.version} - jar - - - - org.apache.beam - beam-runners-flink_${beam.scala.binary.version} - ${beam.beam.version} - - - - ${project.groupId} - zeppelin-interpreter-shaded - ${project.version} - - - - org.apache.commons - commons-exec - ${commons.exec.version} - - - - org.apache.avro - avro - ${avro.version} - - - - - - - - maven-enforcer-plugin - - - maven-dependency-plugin - - - maven-resources-plugin - - - maven-shade-plugin - - - org.apache.maven.plugins - maven-checkstyle-plugin - - false - - - - - diff --git a/beam/src/main/resources/interpreter-setting.json b/beam/src/main/resources/interpreter-setting.json deleted file mode 100644 index e9b4a73c2ad..00000000000 --- a/beam/src/main/resources/interpreter-setting.json +++ /dev/null @@ -1,37 +0,0 @@ -[ - { - "group": "beam", - "name": "beam", - "className": "org.apache.zeppelin.beam.BeamInterpreter", - "defaultInterpreter": true, - "properties": { - }, - "editor": { - "editOnDblClick": false - } - }, - { - "group": "beam", - "name": "scio", - "className": "org.apache.zeppelin.scio.ScioInterpreter", - "properties": { - "zeppelin.scio.argz": { - "envName": "ZEPPELIN_SCIO_ARGZ", - "propertyName": "zeppelin.scio.argz", - "defaultValue": "--runner=InProcessPipelineRunner", - "description": "Scio interpreter wide arguments", - "type": "textarea" - }, - "zeppelin.scio.maxResult": { - "envName": "ZEPPELIN_SCIO_MAXRESULT", - "propertyName": "zeppelin.scio.maxResult", - "defaultValue": "1000", - "description": "Max number of SCollection results to display.", - "type": "number" - } - }, - "editor": { - "language": "scala" - } - } -] diff --git a/beam/src/test/org/apache/zeppelin/beam/BeamInterpreterTest.java b/beam/src/test/org/apache/zeppelin/beam/BeamInterpreterTest.java deleted file mode 100644 index d1c56ee06ec..00000000000 --- a/beam/src/test/org/apache/zeppelin/beam/BeamInterpreterTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.beam; - -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterResult; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.Properties; - -import static org.junit.Assert.assertEquals; - -/** - * BeamInterpreterTest - */ -public class BeamInterpreterTest { - - private static BeamInterpreter beam; - private static InterpreterContext context; - - @BeforeClass - public static void setUp() { - Properties p = new Properties(); - beam = new BeamInterpreter(p); - beam.open(); - context = InterpreterContext.builder().build(); - } - - @AfterClass - public static void tearDown() { - beam.close(); - } - - @Test - public void testStaticRepl() { - - StringWriter writer = new StringWriter(); - PrintWriter out = new PrintWriter(writer); - out.println("public class HelloWorld {"); - out.println(" public static void main(String args[]) {"); - out.println(" System.out.println(\"This is in another java file\");"); - out.println(" }"); - out.println("}"); - out.close(); - - InterpreterResult res = beam.interpret(writer.toString(), context); - - assertEquals(InterpreterResult.Code.SUCCESS, res.code()); - } - - @Test - public void testStaticReplWithoutMain() { - - StringBuffer sourceCode = new StringBuffer(); - sourceCode.append("package org.mdkt;\n"); - sourceCode.append("public class HelloClass {\n"); - sourceCode.append(" public String hello() { return \"hello\"; }"); - sourceCode.append("}"); - InterpreterResult res = beam.interpret(sourceCode.toString(), context); - assertEquals(InterpreterResult.Code.ERROR, res.code()); - } - - @Test - public void testStaticReplWithSyntaxError() { - - StringWriter writer = new StringWriter(); - PrintWriter out = new PrintWriter(writer); - out.println("public class HelloWorld {"); - out.println(" public static void main(String args[]) {"); - out.println(" System.out.prin(\"This is in another java file\");"); - out.println(" }"); - out.println("}"); - out.close(); - InterpreterResult res = beam.interpret(writer.toString(), context); - - assertEquals(InterpreterResult.Code.ERROR, res.code()); - } - -} diff --git a/bigquery/README.md b/bigquery/README.md index 0dff5feb7c8..024d81167da 100644 --- a/bigquery/README.md +++ b/bigquery/README.md @@ -8,7 +8,7 @@ If you like to run these tests manually, please follow the following steps: * [Create a new project](https://support.google.com/cloud/answer/6251787?hl=en) * [Create a Google Compute Engine instance](https://cloud.google.com/compute/docs/instances/create-start-instance) * Copy the project ID that you created and add it to the property "projectId" in `resources/constants.json` -* Run the command mvn -Dbigquery.text.exclude='' test -pl bigquery -am +* Run the command ./mvnw -Dbigquery.text.exclude='' test -pl bigquery -am # Connection The Interpreter opens a connection with the BigQuery Service using the supplied Google project ID and the compute environment variables. diff --git a/bigquery/pom.xml b/bigquery/pom.xml index b689d926e5d..a101374d877 100644 --- a/bigquery/pom.xml +++ b/bigquery/pom.xml @@ -23,7 +23,7 @@ zeppelin-interpreter-parent org.apache.zeppelin - 0.10.0-SNAPSHOT + 0.12.0-SNAPSHOT ../zeppelin-interpreter-parent/pom.xml @@ -39,7 +39,6 @@ v2-rev20190917-1.30.3 - 2.8.6 24.1.1-jre bigquery @@ -77,6 +76,10 @@ guava ${guava.version} + + org.apache.commons + commons-lang3 + @@ -85,9 +88,6 @@ maven-enforcer-plugin - - maven-dependency-plugin - maven-resources-plugin @@ -121,9 +121,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - - false - diff --git a/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java b/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java index 0973fda0df2..c23bd228e71 100644 --- a/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java +++ b/bigquery/src/main/java/org/apache/zeppelin/bigquery/BigQueryInterpreter.java @@ -37,6 +37,7 @@ import com.google.api.services.bigquery.model.TableRow; import com.google.common.base.Function; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,7 +80,7 @@ * */ public class BigQueryInterpreter extends Interpreter { - private static Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class); + private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryInterpreter.class); private static final char NEWLINE = '\n'; private static final char TAB = '\t'; private static Bigquery service = null; @@ -90,6 +91,7 @@ public class BigQueryInterpreter extends Interpreter { static final String WAIT_TIME = "zeppelin.bigquery.wait_time"; static final String MAX_ROWS = "zeppelin.bigquery.max_no_of_rows"; static final String SQL_DIALECT = "zeppelin.bigquery.sql_dialect"; + static final String REGION = "zeppelin.bigquery.region"; private static String jobId = null; private static String projectId = null; @@ -117,9 +119,9 @@ public void open() { try { service = createAuthorizedClient(); exceptionOnConnect = null; - logger.info("Opened BigQuery SQL Connection"); + LOGGER.info("Opened BigQuery SQL Connection"); } catch (IOException e) { - logger.error("Cannot open connection", e); + LOGGER.error("Cannot open connection", e); exceptionOnConnect = e; close(); } @@ -227,6 +229,7 @@ private InterpreterResult executeSql(String sql) { long wTime = Long.parseLong(getProperty(WAIT_TIME)); long maxRows = Long.parseLong(getProperty(MAX_ROWS)); String sqlDialect = getProperty(SQL_DIALECT, "").toLowerCase(); + String region = getProperty(REGION, null); Boolean useLegacySql; switch (sqlDialect) { case "standardsql": @@ -241,9 +244,9 @@ private InterpreterResult executeSql(String sql) { } Iterator pages; try { - pages = run(sql, projId, wTime, maxRows, useLegacySql); + pages = run(sql, projId, wTime, maxRows, useLegacySql, region); } catch (IOException ex) { - logger.error(ex.getMessage()); + LOGGER.error(ex.getMessage()); return new InterpreterResult(Code.ERROR, ex.getMessage()); } try { @@ -258,10 +261,11 @@ private InterpreterResult executeSql(String sql) { //Function to run the SQL on bigQuery service public static Iterator run(final String queryString, - final String projId, final long wTime, final long maxRows, Boolean useLegacySql) - throws IOException { + final String projId, final long wTime, final long maxRows, + Boolean useLegacySql, final String region) + throws IOException { try { - logger.info("Use legacy sql: {}", useLegacySql); + LOGGER.info("Use legacy sql: {}", useLegacySql); QueryResponse query; query = service .jobs() @@ -275,6 +279,9 @@ public static Iterator run(final String queryString, GetQueryResults getRequest = service.jobs().getQueryResults( projectId, jobId); + if (StringUtils.isNotBlank(region)) { + getRequest = getRequest.setLocation(region); + } return getPages(getRequest); } catch (IOException ex) { throw ex; @@ -283,14 +290,14 @@ public static Iterator run(final String queryString, @Override public void close() { - logger.info("Close bqsql connection!"); + LOGGER.info("Close bqsql connection!"); service = null; } @Override public InterpreterResult interpret(String sql, InterpreterContext contextInterpreter) { - logger.info("Run SQL command '{}'", sql); + LOGGER.info("Run SQL command '{}'", sql); return executeSql(sql); } @@ -312,19 +319,19 @@ public int getProgress(InterpreterContext context) { @Override public void cancel(InterpreterContext context) { - logger.info("Trying to Cancel current query statement."); + LOGGER.info("Trying to Cancel current query statement."); if (service != null && jobId != null && projectId != null) { try { Bigquery.Jobs.Cancel request = service.jobs().cancel(projectId, jobId); JobCancelResponse response = request.execute(); jobId = null; - logger.info("Query Execution cancelled"); + LOGGER.info("Query Execution cancelled"); } catch (IOException ex) { - logger.error("Could not cancel the SQL execution"); + LOGGER.error("Could not cancel the SQL execution"); } } else { - logger.info("Query Execution was already cancelled"); + LOGGER.info("Query Execution was already cancelled"); } } diff --git a/bigquery/src/main/resources/interpreter-setting.json b/bigquery/src/main/resources/interpreter-setting.json index 8023bed1522..989cc375d96 100644 --- a/bigquery/src/main/resources/interpreter-setting.json +++ b/bigquery/src/main/resources/interpreter-setting.json @@ -31,6 +31,13 @@ "defaultValue": "", "description": "BigQuery SQL dialect (standardSQL or legacySQL). If empty, query prefix like '#standardSQL' can be used.", "type": "string" + }, + "zeppelin.bigquery.region": { + "envName": null, + "propertyName": "zeppelin.bigquery.region", + "defaultValue": "", + "description": "Location of BigQuery dataset. Needed if it is a single-region dataset.", + "type": "string" } }, "editor": { diff --git a/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java b/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java index 9dcd9f8c61d..630530aa948 100644 --- a/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java +++ b/bigquery/src/test/java/org/apache/zeppelin/bigquery/BigQueryInterpreterTest.java @@ -16,16 +16,9 @@ package org.apache.zeppelin.bigquery; -import static org.junit.Assert.assertEquals; - import com.google.gson.Gson; -import com.google.gson.JsonIOException; -import com.google.gson.JsonSyntaxException; - -import org.junit.Before; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Properties; @@ -33,8 +26,11 @@ import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterGroup; import org.apache.zeppelin.interpreter.InterpreterResult; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; -public class BigQueryInterpreterTest { +class BigQueryInterpreterTest { protected static class Constants { private String projectId; private String oneQuery; @@ -55,12 +51,10 @@ public String getWrong() { protected static Constants constants = null; - public BigQueryInterpreterTest() - throws JsonSyntaxException, JsonIOException, FileNotFoundException { - if (constants == null) { - InputStream is = this.getClass().getResourceAsStream("/constants.json"); - constants = (new Gson()).fromJson(new InputStreamReader(is), Constants.class); - } + @BeforeAll + public static void initConstants() { + InputStream is = ClassLoader.class.getResourceAsStream("/constants.json"); + constants = (new Gson()). fromJson(new InputStreamReader(is), Constants.class); } private InterpreterGroup intpGroup; @@ -68,7 +62,7 @@ public BigQueryInterpreterTest() private InterpreterContext context; - @Before + @BeforeEach public void setUp() throws Exception { Properties p = new Properties(); p.setProperty("zeppelin.bigquery.project_id", constants.getProjectId()); @@ -84,27 +78,27 @@ public void setUp() throws Exception { } @Test - public void sqlSuccess() { + void sqlSuccess() { InterpreterResult ret = bqInterpreter.interpret(constants.getOne(), context); assertEquals(InterpreterResult.Code.SUCCESS, ret.code()); - assertEquals(ret.message().get(0).getType(), InterpreterResult.Type.TABLE); + assertEquals(InterpreterResult.Type.TABLE, ret.message().get(0).getType()); } @Test - public void badSqlSyntaxFails() { + void badSqlSyntaxFails() { InterpreterResult ret = bqInterpreter.interpret(constants.getWrong(), context); assertEquals(InterpreterResult.Code.ERROR, ret.code()); } @Test - public void testWithQueryPrefix() { + void testWithQueryPrefix() { InterpreterResult ret = bqInterpreter.interpret( "#standardSQL\n WITH t AS (select 1) SELECT * FROM t", context); assertEquals(InterpreterResult.Code.SUCCESS, ret.code()); } @Test - public void testInterpreterOutputData() { + void testInterpreterOutputData() { InterpreterResult ret = bqInterpreter.interpret("SELECT 1 AS col1, 2 AS col2", context); String[] lines = ret.message().get(0).getData().split("\\n"); assertEquals(2, lines.length); diff --git a/bigquery/src/main/resources/constants.json b/bigquery/src/test/resources/constants.json similarity index 100% rename from bigquery/src/main/resources/constants.json rename to bigquery/src/test/resources/constants.json diff --git a/submarine/src/test/resources/log4j.properties b/bigquery/src/test/resources/log4j.properties similarity index 68% rename from submarine/src/test/resources/log4j.properties rename to bigquery/src/test/resources/log4j.properties index 9c22fdc83bd..b724845905d 100644 --- a/submarine/src/test/resources/log4j.properties +++ b/bigquery/src/test/resources/log4j.properties @@ -27,21 +27,4 @@ log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c:%L - %m%n # Root logger option log4j.rootLogger=INFO, stdout - -#mute some noisy guys -log4j.logger.org.apache.hadoop.mapred=WARN -log4j.logger.org.apache.hadoop.hive.ql=WARN -log4j.logger.org.apache.hadoop.hive.metastore=WARN -log4j.logger.org.apache.haadoop.hive.service.HiveServer=WARN - -log4j.logger.org.quartz=WARN -log4j.logger.DataNucleus=WARN -log4j.logger.DataNucleus.MetaData=ERROR -log4j.logger.DataNucleus.Datastore=ERROR - -# Log all JDBC parameters -log4j.logger.org.hibernate.type=ALL -log4j.logger.org.apache.hadoop=WARN - -log4j.logger.org.apache.zeppelin.interpreter=DEBUG -log4j.logger.org.apache.zeppelin.scheduler=DEBUG +#log4j.logger.org.apache.zeppelin.interpreter=DEBUG diff --git a/bin/common.sh b/bin/common.sh index fb533d902a2..56f8aa45ff3 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -172,6 +172,10 @@ fi export ZEPPELIN_RUNNER if [[ -z "$ZEPPELIN_IDENT_STRING" ]]; then + # if for some reason the shell doesn't have $USER defined + # (e.g., ssh'd in to execute a command) + # let's get the effective username and use that + USER=${USER:-$(id -nu)} export ZEPPELIN_IDENT_STRING="${USER}" fi diff --git a/bin/interpreter.sh b/bin/interpreter.sh index c75a2990eb5..aaa9b0a15a0 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -101,6 +101,9 @@ fi . "${bin}/common.sh" +# Escape envs +ZEPPELIN_INTP_CLASSPATH_OVERRIDES=$(printf %q "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}") + check_java_version ZEPPELIN_INTERPRETER_API_JAR=$(find "${ZEPPELIN_HOME}/interpreter" -name 'zeppelin-interpreter-shaded-*.jar') @@ -179,29 +182,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH" export PYTHONPATH="${py4j[0]}:$PYTHONPATH" else - # add Hadoop jars into classpath - if [[ -n "${HADOOP_HOME}" ]]; then - # Apache - addEachJarInDirRecursiveForIntp "${HADOOP_HOME}/share" - - # CDH - addJarInDirForIntp "${HADOOP_HOME}" - addJarInDirForIntp "${HADOOP_HOME}/lib" - fi - - addJarInDirForIntp "${INTERPRETER_DIR}/dep" - - py4j=("${ZEPPELIN_HOME}"/interpreter/spark/pyspark/py4j-*-src.zip) - # pick the first match py4j zip - there should only be one - PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}" - - if [[ -z "${PYTHONPATH}" ]]; then - export PYTHONPATH="${PYSPARKPATH}" - else - export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}" - fi - unset PYSPARKPATH - export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}" + echo "No SPARK_HOME is specified" + exit -1 fi if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then @@ -226,28 +208,7 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then else echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded" fi -elif [[ "${INTERPRETER_ID}" == "pig" ]]; then - # autodetect HADOOP_CONF_HOME by heuristic - if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then - if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then - export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" - elif [[ -d "/etc/hadoop/conf" ]]; then - export HADOOP_CONF_DIR="/etc/hadoop/conf" - fi - fi - - if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}" - fi - # autodetect TEZ_CONF_DIR - if [[ -n "${TEZ_CONF_DIR}" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}" - elif [[ -d "/etc/tez/conf" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf" - else - echo "TEZ_CONF_DIR is not set, configuration might not be loaded" - fi elif [[ "${INTERPRETER_ID}" == "flink" ]]; then addEachJarInDirRecursiveForIntp "${FLINK_HOME}/lib" @@ -300,13 +261,13 @@ if [[ -n "${SPARK_SUBMIT}" ]]; then IFS=' ' read -r -a SPARK_SUBMIT_OPTIONS_ARRAY <<< "${SPARK_SUBMIT_OPTIONS}" IFS='|' read -r -a ZEPPELIN_SPARK_CONF_ARRAY <<< "${ZEPPELIN_SPARK_CONF}" if [[ "${ZEPPELIN_SPARK_YARN_CLUSTER}" == "true" ]]; then - INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-java-options" "${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}") + INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-java-options" "${SPARK_DRIVER_EXTRAJAVAOPTIONS_CONF} ${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}") else - INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-class-path" "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}" "--driver-java-options" "${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}") + INTERPRETER_RUN_COMMAND+=("${SPARK_SUBMIT}" "--class" "${ZEPPELIN_SERVER}" "--driver-class-path" "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}" "--driver-java-options" "${SPARK_DRIVER_EXTRAJAVAOPTIONS_CONF} ${JAVA_INTP_OPTS}" "${SPARK_SUBMIT_OPTIONS_ARRAY[@]}" "${ZEPPELIN_SPARK_CONF_ARRAY[@]}" "${SPARK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}") fi -elif [[ "${ZEPPELIN_FLINK_YARN_APPLICATION}" == "true" ]]; then - IFS='|' read -r -a ZEPPELIN_FLINK_YARN_APPLICATION_CONF_ARRAY <<< "${ZEPPELIN_FLINK_YARN_APPLICATION_CONF}" - INTERPRETER_RUN_COMMAND+=("${FLINK_HOME}/bin/flink" "run-application" "-c" "${ZEPPELIN_SERVER}" "-t" "yarn-application" "${ZEPPELIN_FLINK_YARN_APPLICATION_CONF_ARRAY[@]}" "${FLINK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}") +elif [[ -n "${ZEPPELIN_FLINK_APPLICATION_MODE}" ]]; then + IFS='|' read -r -a ZEPPELIN_FLINK_APPLICATION_MODE_CONF_ARRAY <<< "${ZEPPELIN_FLINK_APPLICATION_MODE_CONF}" + INTERPRETER_RUN_COMMAND+=("${FLINK_HOME}/bin/flink" "run-application" "-c" "${ZEPPELIN_SERVER}" "-t" "${ZEPPELIN_FLINK_APPLICATION_MODE}" "${ZEPPELIN_FLINK_APPLICATION_MODE_CONF_ARRAY[@]}" "${FLINK_APP_JAR}" "${CALLBACK_HOST}" "${PORT}" "${INTP_GROUP_ID}" "${INTP_PORT}") else IFS=' ' read -r -a JAVA_INTP_OPTS_ARRAY <<< "${JAVA_INTP_OPTS}" IFS=' ' read -r -a ZEPPELIN_INTP_MEM_ARRAY <<< "${ZEPPELIN_INTP_MEM}" diff --git a/bin/upgrade-note.sh b/bin/upgrade-note.sh deleted file mode 100755 index e68ef784038..00000000000 --- a/bin/upgrade-note.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Convert note format from 0.9.0 before to 0.9.0 after -# - -USAGE="Usage: bin/upgrade-note.sh [-d]" - -bin=$(dirname "${BASH_SOURCE-$0}") -bin=$(cd "${bin}">/dev/null; pwd) - -. "${bin}/common.sh" - -JAVA_OPTS="-Dzeppelin.log.file=logs/upgrade-note.log" -MAIN_CLASS=org.apache.zeppelin.notebook.repo.UpgradeNoteFileTool - -# construct classpath -if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then - ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" -fi - -if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/classes" ]]; then - ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/classes" -fi - -if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then - ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes" -fi - -addJarInDir "${ZEPPELIN_HOME}" -addJarInDir "${ZEPPELIN_HOME}/lib" -addJarInDir "${ZEPPELIN_HOME}/lib/interpreter" -addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib" -addJarInDir "${ZEPPELIN_HOME}/zeppelin-zengine/target/lib" -addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib" - -ZEPPELIN_CLASSPATH="$CLASSPATH:$ZEPPELIN_CLASSPATH" - -## Add hadoop jars when env USE_HADOOP is true -if [[ "${USE_HADOOP}" != "false" ]]; then - if [[ -z "${HADOOP_CONF_DIR}" ]]; then - echo "Please specify HADOOP_CONF_DIR if USE_HADOOP is true" - else - ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}" - if ! [ -x "$(command -v hadoop)" ]; then - echo 'hadoop command is not in PATH when HADOOP_CONF_DIR is specified.' - else - ZEPPELIN_CLASSPATH+=":`hadoop classpath`" - fi - fi -fi - -exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:${ZEPPELIN_CLASSPATH} $MAIN_CLASS "$@" diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh index f3c31ff9168..b3090f19398 100755 --- a/bin/zeppelin-daemon.sh +++ b/bin/zeppelin-daemon.sh @@ -145,14 +145,20 @@ function wait_zeppelin_is_up_for_ci() { if [[ "${CI}" == "true" ]]; then local count=0; while [[ "${count}" -lt 30 ]]; do + # check with angular webapp path curl -v localhost:8080 2>&1 | grep '200 OK' - if [[ $? -ne 0 ]]; then - sleep 1 - continue - else + if [[ $? -eq 0 ]]; then break fi - let "count+=1" + + # check with classic webapp path + curl -v localhost:8080/classic/ 2>&1 | grep '200 OK' + if [[ $? -eq 0 ]]; then + break + fi + + sleep 1 + let "count+=1" done fi } diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh index ef85eaad03a..efd3aae8b8a 100755 --- a/bin/zeppelin.sh +++ b/bin/zeppelin.sh @@ -30,7 +30,7 @@ if [ -f /proc/self/cgroup ] && [ -n "$(command -v getent)" ]; then set +e uidentry="$(getent passwd "$myuid")" set -e - + # If there is no passwd entry for the container UID, attempt to create one if [ -z "$uidentry" ] ; then if [ -w /etc/passwd ] ; then @@ -115,8 +115,18 @@ addJarInDir "${ZEPPELIN_HOME}/zeppelin-web-angular/target/lib" ZEPPELIN_CLASSPATH="$CLASSPATH:$ZEPPELIN_CLASSPATH" -if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then - ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}" +## Add hadoop jars when env USE_HADOOP is true +if [[ "${USE_HADOOP}" != "false" ]]; then + if [[ -z "${HADOOP_CONF_DIR}" ]]; then + echo "Please specify HADOOP_CONF_DIR if USE_HADOOP is true" + else + ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}" + if ! [ -x "$(command -v hadoop)" ]; then + echo 'hadoop command is not in PATH when HADOOP_CONF_DIR is specified.' + else + ZEPPELIN_CLASSPATH+=":`hadoop classpath`" + fi + fi fi if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then diff --git a/build-tools/pom.xml b/build-tools/pom.xml new file mode 100644 index 00000000000..5559f2ff347 --- /dev/null +++ b/build-tools/pom.xml @@ -0,0 +1,27 @@ + + + 4.0.0 + Zeppelin: Tools + Zeppelin Tools + build-tools + + org.apache.zeppelin + zeppelin + 0.12.0-SNAPSHOT + + \ No newline at end of file diff --git a/_tools/checkstyle.xml b/build-tools/src/main/resources/zeppelin/checkstyle.xml similarity index 100% rename from _tools/checkstyle.xml rename to build-tools/src/main/resources/zeppelin/checkstyle.xml diff --git a/cassandra/pom.xml b/cassandra/pom.xml index d4ec018382c..7b9ea6cc7ed 100644 --- a/cassandra/pom.xml +++ b/cassandra/pom.xml @@ -21,7 +21,7 @@ zeppelin-interpreter-parent org.apache.zeppelin - 0.10.0-SNAPSHOT + 0.12.0-SNAPSHOT ../zeppelin-interpreter-parent/pom.xml @@ -31,15 +31,19 @@ Zeppelin cassandra support - 4.8.0 - 1.1.7.3 - 1.6.0 - 1.7.1 + 4.14.1 + 1.1.10.4 + 1.8.0 + 1.9.8 - 4.2.0 + 5.12.1 4.3.1.0 + ${scala.2.12.version} + 2.12 + 1.1.2 + cassandra @@ -100,13 +104,6 @@ runtime - - org.apache.commons commons-lang3 @@ -118,6 +115,12 @@ ${scalate.version} + + org.scala-lang.modules + scala-parser-combinators_${scala.binary.version} + ${scala.parser.combinators} + + org.scalatest @@ -126,6 +129,13 @@ test + + org.scalacheck + scalacheck_${scala.binary.version} + ${scalacheck.version} + test + + net.java.dev.jna jna @@ -152,12 +162,6 @@ mockito-core test - - - org.assertj - assertj-core - test - @@ -174,30 +178,50 @@ - org.scala-tools - maven-scala-plugin + net.alchim31.maven + scala-maven-plugin - compile + eclipse-add-source - compile + add-source - compile - test-compile + scala-compile-first + process-resources - testCompile + compile - test-compile - process-resources + scala-test-compile-first + process-test-resources - compile + testCompile + + + -unchecked + -deprecation + -feature + -nobootcp + + + -Xms1024m + -Xmx1024m + -XX:MaxMetaspaceSize=${MaxMetaspace} + + + -source + ${java.version} + -target + ${java.version} + -Xlint:all,-serial,-path,-options + + @@ -213,30 +237,9 @@ - - org.scalatra.scalate - maven-scalate-plugin_${scala.binary.version} - - - compile - process-classes - - precompile - - - ${basedir}/src/main/resources/scalate - org.fusesource.scalate.DefaultRenderContext - - - - - maven-enforcer-plugin - - maven-dependency-plugin - maven-resources-plugin @@ -246,9 +249,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - - false - diff --git a/cassandra/src/main/resources/scalate/helpMenu.ssp b/cassandra/src/main/resources/scalate/helpMenu.ssp index 8ed2ae2564a..80fc99413a4 100644 --- a/cassandra/src/main/resources/scalate/helpMenu.ssp +++ b/cassandra/src/main/resources/scalate/helpMenu.ssp @@ -219,7 +219,7 @@ 3.x - + http://docs.datastax.com/en/cql/3.3/cql/cqlIntro.html @@ -227,7 +227,7 @@ 2.2 - + http://docs.datastax.com/en/cql/3.3/cql/cqlIntro.html @@ -235,7 +235,7 @@ 2.1 - + http://docs.datastax.com/en/cql/3.1/cql/cql_intro_c.html @@ -778,7 +778,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;null is parsed as-is
  • boolean (true|false) are parsed as-is
  • collection values must follow the - standard CQL syntax: + standard CQL syntax:
    • list: [‘list_item1’, ’list_item2’, ...]
    • set: {‘set_item1’, ‘set_item2’, …}
    • @@ -787,12 +787,12 @@ select id, double, float, text, date, time, timestamp from zep.test_format;
    • tuple values should be enclosed between parenthesis - (see tuple CQL syntax): + (see tuple CQL syntax): (‘text’, 123, true)
    • udt values should be enclosed between brackets - (see udt CQL syntax): + (see udt CQL syntax): {stree_name: ‘Beverly Hills’, number: 104, zip_code: 90020, state: ‘California’, …}
    @@ -834,7 +834,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;

    Instead of hard-coding your CQL queries, it is possible to use - Zeppelin dynamic form + Zeppelin dynamic form syntax to inject simple value or multiple choices forms. The legacy mustache syntax ( {{ }} ) to bind input text and select form is still supported but is deprecated and will be removed in future releases. @@ -1050,7 +1050,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;Asynchronous execution is only possible when it is possible to return a Future value in the InterpreterResult. It may be an interesting proposal for the Zeppelin project.

    Recently, Zeppelin allows you to choose the level of isolation for your interpreters (see - Interpreter Binding Mode ). + Interpreter Binding Mode ).

    Long story short, you have 3 available bindings: @@ -1137,7 +1137,7 @@ select id, double, float, text, date, time, timestamp from zep.test_format;

    - If you encounter a bug for this interpreter, please create a JIRA ticket. + If you encounter a bug for this interpreter, please create a JIRA ticket.
    diff --git a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala index 5529a71859c..c3007f436f8 100644 --- a/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala +++ b/cassandra/src/main/scala/org/apache/zeppelin/cassandra/InterpreterLogic.scala @@ -380,7 +380,7 @@ class InterpreterLogic(val session: CqlSession, val properties: Properties) { findInAngularRepository(variable) match { case Some(value) => statement.replaceAll(escapedExp,value.toString) case None => - val value = context.getGui.input(variable, defaultVal) + val value = context.getGui.textbox(variable, defaultVal) statement.replaceAll(escapedExp, value.toString) } diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java index 5e4c9946117..8a6cce4ee9e 100644 --- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java +++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/CassandraInterpreterTest.java @@ -19,6 +19,8 @@ import com.datastax.oss.driver.api.core.CqlSession; import com.datastax.oss.driver.api.core.config.DefaultDriverOption; import com.datastax.oss.driver.api.core.config.DriverExecutionProfile; + +import org.apache.commons.io.IOUtils; import org.apache.zeppelin.display.AngularObjectRegistry; import org.apache.zeppelin.display.GUI; import org.apache.zeppelin.interpreter.Interpreter; @@ -28,15 +30,13 @@ import org.cassandraunit.CQLDataLoader; import org.cassandraunit.dataset.cql.ClassPathCQLDataSet; import org.cassandraunit.utils.EmbeddedCassandraServerHelper; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Properties; @@ -61,18 +61,20 @@ import static org.apache.zeppelin.cassandra.CassandraInterpreter.CASSANDRA_SOCKET_READ_TIMEOUT_MILLIS; import static org.apache.zeppelin.cassandra.CassandraInterpreter.CASSANDRA_SOCKET_TCP_NO_DELAY; import static org.apache.zeppelin.cassandra.CassandraInterpreter.CASSANDRA_SPECULATIVE_EXECUTION_POLICY; -import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; -public class CassandraInterpreterTest { //extends AbstractCassandraUnit4CQLTestCase { +public class CassandraInterpreterTest { // extends AbstractCassandraUnit4CQLTestCase { private static final String ARTISTS_TABLE = "zeppelin.artists"; private static volatile CassandraInterpreter interpreter; private final InterpreterContext intrContext = InterpreterContext.builder() - .setParagraphTitle("Paragraph1") - .build(); + .setParagraphTitle("Paragraph1") + .build(); - @BeforeClass + @BeforeAll public static synchronized void setUp() throws IOException, InterruptedException { System.setProperty("cassandra.skip_wait_for_gossip_to_settle", "0"); System.setProperty("cassandra.load_ring_state", "false"); @@ -111,44 +113,43 @@ public static synchronized void setUp() throws IOException, InterruptedException properties.setProperty(CASSANDRA_HOSTS, EmbeddedCassandraServerHelper.getHost()); properties.setProperty(CASSANDRA_PORT, - Integer.toString(EmbeddedCassandraServerHelper.getNativeTransportPort())); + Integer.toString(EmbeddedCassandraServerHelper.getNativeTransportPort())); properties.setProperty("datastax-java-driver.advanced.connection.pool.local.size", "1"); interpreter = new CassandraInterpreter(properties); interpreter.open(); } - @AfterClass + @AfterAll public static void tearDown() { interpreter.close(); } @Test - public void should_create_cluster_and_session_upon_call_to_open(){ - assertThat(interpreter.session).isNotNull(); - assertThat(interpreter.helper).isNotNull(); + void should_create_cluster_and_session_upon_call_to_open() { + assertNotNull(interpreter.session); + assertNotNull(interpreter.helper); } @Test - public void should_set_custom_option() { - assertThat(interpreter.session).isNotNull(); + void should_set_custom_option() { + assertNotNull(interpreter.session); DriverExecutionProfile config = interpreter.session.getContext() - .getConfig().getDefaultProfile(); - assertThat(config.getInt(DefaultDriverOption.CONNECTION_POOL_LOCAL_SIZE, 10)) - .isEqualTo(1); + .getConfig().getDefaultProfile(); + assertEquals(1, config.getInt(DefaultDriverOption.CONNECTION_POOL_LOCAL_SIZE, 10)); } @Test - public void should_interpret_simple_select() { - //Given + void should_interpret_simple_select() { + // Given - //When + // When final InterpreterResult actual = interpreter.interpret("SELECT * FROM " + ARTISTS_TABLE + - " LIMIT 10;", intrContext); + " LIMIT 10;", intrContext); - //Then - assertThat(actual).isNotNull(); - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("name\tborn\tcountry\tdied\tgender\t" + + // Then + assertNotNull(actual); + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("name\tborn\tcountry\tdied\tgender\t" + "styles\ttype\n" + "Bogdan Raczynski\t1977-01-01\tPoland\tnull\tMale\t" + "[Dance, Electro]\tPerson\n" + @@ -163,60 +164,60 @@ public void should_interpret_simple_select() { "[Rock, Pop, Classic]\tPerson\n" + "Los Paranoias\tnull\tUnknown\tnull\tnull\t[Unknown]\tnull\n" + "…And You Will Know Us by the Trail of Dead\t1994-01-01\tUSA\tnull\tnull\t" + - "[Rock, Pop, Classic]\tGroup\n"); + "[Rock, Pop, Classic]\tGroup\n", actual.message().get(0).getData()); } @Test - public void should_interpret_select_statement() { - //Given + void should_interpret_select_statement() { + // Given - //When + // When final InterpreterResult actual = interpreter.interpret("SELECT * FROM " + ARTISTS_TABLE + - " LIMIT 2;", intrContext); + " LIMIT 2;", intrContext); - //Then - assertThat(actual).isNotNull(); - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()) - .isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + + // Then + assertNotNull(actual); + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + "Bogdan Raczynski\t1977-01-01\tPoland\tnull\tMale\t" + "[Dance, Electro]\tPerson\n" + - "Krishna Das\t1947-05-31\tUSA\tnull\tMale\t[Unknown]\tPerson\n"); + "Krishna Das\t1947-05-31\tUSA\tnull\tMale\t[Unknown]\tPerson\n", + actual.message().get(0).getData()); } @Test - public void should_interpret_select_statement_with_cql_format() { - //When + void should_interpret_select_statement_with_cql_format() { + // When intrContext.getLocalProperties().put("outputFormat", "cql"); final InterpreterResult actual = interpreter.interpret( - "SELECT * FROM " + ARTISTS_TABLE + " LIMIT 2;", intrContext); + "SELECT * FROM " + ARTISTS_TABLE + " LIMIT 2;", intrContext); intrContext.getLocalProperties().remove("outputFormat"); - //Then - assertThat(actual).isNotNull(); - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()) - .isEqualTo("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + - "'Bogdan Raczynski'\t'1977-01-01'\t'Poland'\tnull\t'Male'\t" + - "['Dance','Electro']\t'Person'\n" + - "'Krishna Das'\t'1947-05-31'\t'USA'\tnull\t'Male'\t['Unknown']\t'Person'\n"); + // Then + assertNotNull(actual); + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("name\tborn\tcountry\tdied\tgender\tstyles\ttype\n" + + "'Bogdan Raczynski'\t'1977-01-01'\t'Poland'\tnull\t'Male'\t" + + "['Dance','Electro']\t'Person'\n" + + "'Krishna Das'\t'1947-05-31'\t'USA'\tnull\t'Male'\t['Unknown']\t'Person'\n", + actual.message().get(0).getData()); } @Test - public void should_interpret_select_statement_with_formatting_options() { - //When + void should_interpret_select_statement_with_formatting_options() { + // When Map props = intrContext.getLocalProperties(); props.put("outputFormat", "human"); props.put("locale", "de_DE"); props.put("floatPrecision", "2"); props.put("doublePrecision", "4"); props.put("decimalPrecision", "5"); - props.put("timeFormat", "hh:mma"); + props.put("timeFormat", "hh:mm"); props.put("timestampFormat", "MM/dd/yy HH:mm"); - props.put("dateFormat", "E, d MMM yy"); + props.put("dateFormat", "EEEE, d MMMM yy"); props.put("timezone", "Etc/GMT+2"); String query = - "select date,time,timestamp,dec,double,float,tuple,udt from zeppelin.test_format;"; + "select date,time,timestamp,dec,double,float,tuple,udt from zeppelin.test_format;"; final InterpreterResult actual = interpreter.interpret(query, intrContext); props.remove("outputFormat"); props.remove("locale"); @@ -228,624 +229,614 @@ public void should_interpret_select_statement_with_formatting_options() { props.remove("dateFormat"); props.remove("timezone"); - //Then - assertThat(actual).isNotNull(); - assertThat(actual.code()).isEqualTo(Code.SUCCESS); + // Then + assertNotNull(actual); + assertEquals(Code.SUCCESS, actual.code()); String expected = "date\ttime\ttimestamp\tdec\tdouble\tfloat\ttuple\tudt\n" + - "Di, 29 Jan 19\t04:05AM\t06/16/20 21:59\t123562352352,12346\t10,0153\t20,03\t" + - "(1, text, 10)\t{id: 1, t: text, lst: [1, 2, 3]}\n"; - assertThat(actual.message().get(0).getData()).isEqualTo(expected); + "Dienstag, 29 Januar 19\t04:05\t06/16/20 21:59\t123562352352,12346\t10,0153\t20,03\t" + + "(1, text, 10)\t{id: 1, t: text, lst: [1, 2, 3]}\n"; + assertEquals(expected, actual.message().get(0).getData()); } @Test - public void should_interpret_multiple_statements_with_single_line_logged_batch() { - //Given + void should_interpret_multiple_statements_with_single_line_logged_batch() { + // Given String statements = "CREATE TABLE IF NOT EXISTS zeppelin.albums(\n" + - " title text PRIMARY KEY,\n" + - " artist text,\n" + - " year int\n" + - ");\n" + - "BEGIN BATCH" + - " INSERT INTO zeppelin.albums(title,artist,year) " + - "VALUES('The Impossible Dream EP','Carter the Unstoppable Sex Machine',1992);" + - " INSERT INTO zeppelin.albums(title,artist,year) " + - "VALUES('The Way You Are','Tears for Fears',1983);" + - " INSERT INTO zeppelin.albums(title,artist,year) " + - "VALUES('Primitive','Soulfly',2003);" + - "APPLY BATCH;\n" + - "SELECT * FROM zeppelin.albums;"; - //When + " title text PRIMARY KEY,\n" + + " artist text,\n" + + " year int\n" + + ");\n" + + "BEGIN BATCH" + + " INSERT INTO zeppelin.albums(title,artist,year) " + + "VALUES('The Impossible Dream EP','Carter the Unstoppable Sex Machine',1992);" + + " INSERT INTO zeppelin.albums(title,artist,year) " + + "VALUES('The Way You Are','Tears for Fears',1983);" + + " INSERT INTO zeppelin.albums(title,artist,year) " + + "VALUES('Primitive','Soulfly',2003);" + + "APPLY BATCH;\n" + + "SELECT * FROM zeppelin.albums;"; + // When final InterpreterResult actual = interpreter.interpret(statements, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("title\tartist\tyear\n" + - "The Impossible Dream EP\tCarter the Unstoppable Sex Machine\t1992\n" + - "The Way You Are\tTears for Fears\t1983\n" + - "Primitive\tSoulfly\t2003\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("title\tartist\tyear\n" + + "The Impossible Dream EP\tCarter the Unstoppable Sex Machine\t1992\n" + + "The Way You Are\tTears for Fears\t1983\n" + + "Primitive\tSoulfly\t2003\n", actual.message().get(0).getData()); } - + @Test - public void should_throw_statement_not_having_semi_colon() { - //Given + void should_throw_statement_not_having_semi_colon() { + // Given String statement = "SELECT * zeppelin.albums"; - //When + // When final InterpreterResult actual = interpreter.interpret(statement, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()) - .contains("Error parsing input:\n" + - "\t'SELECT * zeppelin.albums'\n" + - "Did you forget to add ; (semi-colon) at the end of each CQL statement ?"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertTrue(actual.message().get(0).getData().contains("Error parsing input:\n" + + "\t'SELECT * zeppelin.albums'\n" + + "Did you forget to add ; (semi-colon) at the end of each CQL statement ?"), + actual.message().get(0).getData()); } @Test - public void should_validate_statement() { - //Given + void should_validate_statement() { + // Given String statement = "SELECT * zeppelin.albums;"; - //When + // When final InterpreterResult actual = interpreter.interpret(statement, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); + // Then + assertEquals(Code.ERROR, actual.code()); String s = "line 1:9 mismatched input 'zeppelin' expecting K_FROM (SELECT * [zeppelin]...)"; - assertThat(actual.message().get(0).getData()) - .contains(s); + assertTrue(actual.message().get(0).getData().contains(s), actual.message().get(0).getData()); } @Test - public void should_execute_statement_with_consistency_option() { - //Given + void should_execute_statement_with_consistency_option() { + // Given String statement = "@consistency=THREE\n" + - "SELECT * FROM zeppelin.artists LIMIT 1;"; + "SELECT * FROM zeppelin.artists LIMIT 1;"; - //When + // When final InterpreterResult actual = interpreter.interpret(statement, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()) - .contains("Not enough replicas available for query at consistency THREE (3 required " + - "but only 1 alive)"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertTrue(actual.message().get(0).getData() + .contains("Not enough replicas available for query at consistency THREE (3 required " + + "but only 1 alive)"), + actual.message().get(0).getData()); } @Test - public void should_execute_statement_with_serial_consistency_option() { - //Given + void should_execute_statement_with_serial_consistency_option() { + // Given String statement = "@serialConsistency=SERIAL\n" + - "SELECT * FROM zeppelin.artists LIMIT 1;"; + "SELECT * FROM zeppelin.artists LIMIT 1;"; - //When + // When final InterpreterResult actual = interpreter.interpret(statement, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); + // Then + assertEquals(Code.SUCCESS, actual.code()); } @Test - public void should_execute_statement_with_timestamp_option() throws Exception { - //Given + void should_execute_statement_with_timestamp_option() throws Exception { + // Given String statement1 = "INSERT INTO zeppelin.ts(key,val) VALUES('k','v1');"; String statement2 = "@timestamp=15\n" + - "INSERT INTO zeppelin.ts(key,val) VALUES('k','v2');"; + "INSERT INTO zeppelin.ts(key,val) VALUES('k','v2');"; CqlSession session = EmbeddedCassandraServerHelper.getSession(); // Insert v1 with current timestamp interpreter.interpret(statement1, intrContext); System.out.println("going to read data from zeppelin.ts;"); session.execute("SELECT val FROM zeppelin.ts LIMIT 1") - .forEach(x -> System.out.println("row " + x )); + .forEach(x -> System.out.println("row " + x)); Thread.sleep(1); - //When + // When // Insert v2 with past timestamp interpreter.interpret(statement2, intrContext); System.out.println("going to read data from zeppelin.ts;"); session.execute("SELECT val FROM zeppelin.ts LIMIT 1") - .forEach(x -> System.out.println("row " + x )); + .forEach(x -> System.out.println("row " + x)); final String actual = session.execute("SELECT val FROM zeppelin.ts LIMIT 1").one() - .getString("val"); + .getString("val"); - //Then - assertThat(actual).isEqualTo("v1"); + // Then + assertEquals("v1", actual); } @Test - public void should_execute_statement_with_request_timeout() { - //Given + void should_execute_statement_with_request_timeout() { + // Given String statement = "@requestTimeOut=10000000\n" + - "SELECT * FROM zeppelin.artists;"; + "SELECT * FROM zeppelin.artists;"; - //When + // When final InterpreterResult actual = interpreter.interpret(statement, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); + // Then + assertEquals(Code.SUCCESS, actual.code()); } @Test - public void should_execute_prepared_and_bound_statements() { - //Given + void should_execute_prepared_and_bound_statements() { + // Given String queries = "@prepare[ps]=INSERT INTO zeppelin.prepared(key,val) VALUES(?,?)\n" + - "@prepare[select]=SELECT * FROM zeppelin.prepared WHERE key=:key\n" + - "@bind[ps]='myKey','myValue'\n" + - "@bind[select]='myKey'"; + "@prepare[select]=SELECT * FROM zeppelin.prepared WHERE key=:key\n" + + "@bind[ps]='myKey','myValue'\n" + + "@bind[select]='myKey'"; - //When + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("key\tval\n" + - "myKey\tmyValue\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("key\tval\nmyKey\tmyValue\n", actual.message().get(0).getData()); } @Test - public void should_execute_bound_statement() { - //Given + void should_execute_bound_statement() { + // Given String queries = "@prepare[users_insert]=INSERT INTO zeppelin.users" + - "(login,firstname,lastname,addresses,location)" + - "VALUES(:login,:fn,:ln,:addresses,:loc)\n" + - "@bind[users_insert]='jdoe','John','DOE'," + - "{street_number: 3, street_name: 'Beverly Hills Bld', zip_code: 90209," + - " country: 'USA', extra_info: ['Right on the hills','Next to the post box']," + - " phone_numbers: {'home': 2016778524, 'office': 2015790847}}," + - "('USA', 90209, 'Beverly Hills')\n" + - "SELECT * FROM zeppelin.users WHERE login='jdoe';"; - //When + "(login,firstname,lastname,addresses,location)" + + "VALUES(:login,:fn,:ln,:addresses,:loc)\n" + + "@bind[users_insert]='jdoe','John','DOE'," + + "{street_number: 3, street_name: 'Beverly Hills Bld', zip_code: 90209," + + " country: 'USA', extra_info: ['Right on the hills','Next to the post box']," + + " phone_numbers: {'home': 2016778524, 'office': 2015790847}}," + + "('USA', 90209, 'Beverly Hills')\n" + + "SELECT * FROM zeppelin.users WHERE login='jdoe';"; + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo( - "login\taddresses\tage\tdeceased\tfirstname\tlast_update\tlastname\tlocation\n" + - "jdoe\t" + - "{street_number: 3, street_name: Beverly Hills Bld, zip_code: 90209, " + - "country: USA, extra_info: [Right on the hills, Next to the post box], " + - "phone_numbers: {home: 2016778524, office: 2015790847}}\tnull\t" + - "null\t" + - "John\t" + - "null\t" + - "DOE\t" + - "(USA, 90209, Beverly Hills)\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("login\taddresses\tage\tdeceased\tfirstname\tlast_update\tlastname\tlocation\n" + + "jdoe\t" + + "{street_number: 3, street_name: Beverly Hills Bld, zip_code: 90209, " + + "country: USA, extra_info: [Right on the hills, Next to the post box], " + + "phone_numbers: {home: 2016778524, office: 2015790847}}\tnull\t" + + "null\t" + + "John\t" + + "null\t" + + "DOE\t" + + "(USA, 90209, Beverly Hills)\n", actual.message().get(0).getData()); } @Test - public void should_exception_when_executing_unknown_bound_statement() { - //Given + void should_exception_when_executing_unknown_bound_statement() { + // Given String queries = "@bind[select_users]='jdoe'"; - //When + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()) - .isEqualTo("The statement 'select_users' can not be bound to values. " + - "Are you sure you did prepare it with @prepare[select_users] ?"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertEquals("The statement 'select_users' can not be bound to values. " + + "Are you sure you did prepare it with @prepare[select_users] ?", + actual.message().get(0).getData()); } @Test - public void should_extract_variable_from_statement() { - //Given + void should_extract_variable_from_statement() { + // Given AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null); GUI gui = new GUI(); gui.textbox("login", "hsue"); gui.textbox("age", "27"); InterpreterContext intrContext = InterpreterContext.builder() - .setParagraphTitle("Paragraph1") - .setAngularObjectRegistry(angularObjectRegistry) - .setGUI(gui) - .build(); + .setParagraphTitle("Paragraph1") + .setAngularObjectRegistry(angularObjectRegistry) + .setGUI(gui) + .build(); String queries = "@prepare[test_insert_with_variable]=" + - "INSERT INTO zeppelin.users(login,firstname,lastname,age) VALUES(?,?,?,?)\n" + - "@bind[test_insert_with_variable]='{{login=hsue}}','Helen','SUE',{{age=27}}\n" + - "SELECT firstname,lastname,age FROM zeppelin.users WHERE login='hsue';"; - //When + "INSERT INTO zeppelin.users(login,firstname,lastname,age) VALUES(?,?,?,?)\n" + + "@bind[test_insert_with_variable]='{{login=hsue}}','Helen','SUE',{{age=27}}\n" + + "SELECT firstname,lastname,age FROM zeppelin.users WHERE login='hsue';"; + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("firstname\tlastname\tage\n" + - "Helen\tSUE\t27\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("firstname\tlastname\tage\nHelen\tSUE\t27\n", actual.message().get(0).getData()); } @Test - public void should_just_prepare_statement() { - //Given + void should_just_prepare_statement() { + // Given String queries = "@prepare[just_prepare]=SELECT name,country,styles " + - "FROM zeppelin.artists LIMIT 3"; + "FROM zeppelin.artists LIMIT 3"; final String expected = reformatHtml( - readTestResource("/scalate/NoResult.html")); + readTestResource("/scalate/NoResult.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_execute_bound_statement_with_no_bound_value() { - //Given + void should_execute_bound_statement_with_no_bound_value() { + // Given String queries = "@prepare[select_no_bound_value]=SELECT name,country,styles " + - "FROM zeppelin.artists LIMIT 3\n" + - "@bind[select_no_bound_value]"; + "FROM zeppelin.artists LIMIT 3\n" + + "@bind[select_no_bound_value]"; - //When + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("name\tcountry\tstyles\n" + - "Bogdan Raczynski\tPoland\t[Dance, Electro]\n" + - "Krishna Das\tUSA\t[Unknown]\n" + - "Sheryl Crow\tUSA\t[Classic, Rock, Country, Blues, Pop, Folk]\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("name\tcountry\tstyles\n" + + "Bogdan Raczynski\tPoland\t[Dance, Electro]\n" + + "Krishna Das\tUSA\t[Unknown]\n" + + "Sheryl Crow\tUSA\t[Classic, Rock, Country, Blues, Pop, Folk]\n", + actual.message().get(0).getData()); } @Test - public void should_parse_date_value() { - //Given + void should_parse_date_value() { + // Given String queries = "@prepare[parse_date]=INSERT INTO zeppelin.users(login,last_update) " + - "VALUES(?,?)\n" + - "@bind[parse_date]='last_update','2015-07-30 12:00:01'\n" + - "SELECT last_update FROM zeppelin.users WHERE login='last_update';"; - //When + "VALUES(?,?)\n" + + "@bind[parse_date]='last_update','2015-07-30 12:00:01'\n" + + "SELECT last_update FROM zeppelin.users WHERE login='last_update';"; + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).contains("last_update\n2015-07-30T12:00:01.000Z"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertTrue(actual.message().get(0).getData().contains("last_update\n2015-07-30T12:00:01.000Z"), + actual.message().get(0).getData()); } @Test - public void should_bind_null_value() { - //Given + void should_bind_null_value() { + // Given String queries = "@prepare[bind_null]=INSERT INTO zeppelin.users(login,firstname,lastname) " + - "VALUES(?,?,?)\n" + - "@bind[bind_null]='bind_null',null,'NULL'\n" + - "SELECT firstname,lastname FROM zeppelin.users WHERE login='bind_null';"; - //When + "VALUES(?,?,?)\n" + + "@bind[bind_null]='bind_null',null,'NULL'\n" + + "SELECT firstname,lastname FROM zeppelin.users WHERE login='bind_null';"; + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("firstname\tlastname\n" + - "null\tNULL\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("firstname\tlastname\nnull\tNULL\n", actual.message().get(0).getData()); } @Test - public void should_bind_boolean_value() { - //Given + void should_bind_boolean_value() { + // Given String queries = "@prepare[bind_boolean]=INSERT INTO zeppelin.users(login,deceased) " + - "VALUES(?,?)\n" + - "@bind[bind_boolean]='bind_bool',false\n" + - "SELECT login,deceased FROM zeppelin.users WHERE login='bind_bool';"; - //When + "VALUES(?,?)\n" + + "@bind[bind_boolean]='bind_bool',false\n" + + "SELECT login,deceased FROM zeppelin.users WHERE login='bind_bool';"; + // When final InterpreterResult actual = interpreter.interpret(queries, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message().get(0).getData()).isEqualTo("login\tdeceased\n" + - "bind_bool\tfalse\n"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("login\tdeceased\nbind_bool\tfalse\n", actual.message().get(0).getData()); } @Test - public void should_fail_when_executing_a_removed_prepared_statement() { - //Given + void should_fail_when_executing_a_removed_prepared_statement() { + // Given String prepareFirst = "@prepare[to_be_removed]=INSERT INTO zeppelin.users(login,deceased) " + - "VALUES(?,?)"; + "VALUES(?,?)"; interpreter.interpret(prepareFirst, intrContext); String removePrepared = "@remove_prepare[to_be_removed]\n" + - "@bind[to_be_removed]='bind_bool'"; + "@bind[to_be_removed]='bind_bool'"; - //When + // When final InterpreterResult actual = interpreter.interpret(removePrepared, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()).isEqualTo("The statement 'to_be_removed' can " + - "not be bound to values. Are you sure you did prepare it with " + - "@prepare[to_be_removed] ?"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertEquals("The statement 'to_be_removed' can " + + "not be bound to values. Are you sure you did prepare it with " + + "@prepare[to_be_removed] ?", actual.message().get(0).getData()); } @Test - public void should_display_statistics_for_non_select_statement() { - //Given + void should_display_statistics_for_non_select_statement() { + // Given String query = "USE zeppelin;\nCREATE TABLE IF NOT EXISTS no_select(id int PRIMARY KEY);"; final String rawResult = reformatHtml(readTestResource( - "/scalate/NoResultWithExecutionInfo.html")); + "/scalate/NoResultWithExecutionInfo.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); final int port = EmbeddedCassandraServerHelper.getNativeTransportPort(); final String address = EmbeddedCassandraServerHelper.getHost(); - //Then + // Then final String expected = rawResult.replaceAll("TRIED_HOSTS", address + ":" + port) - .replaceAll("QUERIED_HOSTS", address + ":" + port); + .replaceAll("QUERIED_HOSTS", address + ":" + port); - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_error_and_display_stack_trace() { - //Given + void should_error_and_display_stack_trace() { + // Given String query = "@consistency=THREE\n" + - "SELECT * FROM zeppelin.users LIMIT 3;"; - //When + "SELECT * FROM zeppelin.users LIMIT 3;"; + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()) - .contains("All 1 node(s) tried for the query failed"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertTrue( + actual.message().get(0).getData().contains("All 1 node(s) tried for the query failed"), + actual.message().get(0).getData()); } @Test - public void should_describe_cluster() { - //Given + void should_describe_cluster() { + // Given String query = "DESCRIBE CLUSTER;"; final String expected = reformatHtml( - readTestResource("/scalate/DescribeCluster.html")); + readTestResource("/scalate/DescribeCluster.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_describe_keyspaces() { - //Given + void should_describe_keyspaces() { + // Given String query = "DESCRIBE KEYSPACES;"; final String expected = reformatHtml( - readTestResource("/scalate/DescribeKeyspaces.html")); + readTestResource("/scalate/DescribeKeyspaces.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_describe_keyspace() { - //Given + void should_describe_keyspace() { + // Given String query = "DESCRIBE KEYSPACE live_data;"; final String expected = reformatHtml( - readTestResource("/scalate/DescribeKeyspace_live_data.html")); + readTestResource("/scalate/DescribeKeyspace_live_data.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - @Ignore - //TODO(n.a.) activate test when using Java 8 and C* 3.x - public void should_describe_function() throws Exception { - //Given + @Disabled("TODO(n.a.) activate test when using Java 8 and C* 3.x") + void should_describe_function() throws Exception { + // Given Properties properties = new Properties(); properties.setProperty(CASSANDRA_HOSTS, "127.0.0.1"); - properties.setProperty(CASSANDRA_PORT, "9042"); + properties.setProperty(CASSANDRA_PORT, "9042"); Interpreter interpreter = new CassandraInterpreter(properties); interpreter.open(); String createFunction = "CREATE FUNCTION zeppelin.maxof(val1 int,val2 int) " + - "RETURNS NULL ON NULL INPUT " + - "RETURNS int " + - "LANGUAGE java " + - "AS $$" + - " return Math.max(val1, val2);\n" + - "$$;"; + "RETURNS NULL ON NULL INPUT " + + "RETURNS int " + + "LANGUAGE java " + + "AS $$" + + " return Math.max(val1, val2);\n" + + "$$;"; interpreter.interpret(createFunction, intrContext); String query = "DESCRIBE FUNCTION zeppelin.maxOf;"; - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(actual.message()).isEqualTo("xxxxx"); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals("xxxxx", actual.message()); } @Test - @Ignore - //TODO(n.a.) activate test when using Java 8 and C* 3.x - public void should_describe_aggregate() throws Exception { - //Given + @Disabled("TODO(n.a.) activate test when using Java 8 and C* 3.x") + void should_describe_aggregate() throws Exception { + // Given Properties properties = new Properties(); properties.setProperty(CASSANDRA_HOSTS, "127.0.0.1"); - properties.setProperty(CASSANDRA_PORT, "9042"); + properties.setProperty(CASSANDRA_PORT, "9042"); Interpreter interpreter = new CassandraInterpreter(properties); interpreter.open(); final String query = "DESCRIBE AGGREGATES;"; - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); + // Then + assertEquals(Code.SUCCESS, actual.code()); } @Test - @Ignore - //TODO(n.a.) activate test when using Java 8 and C* 3.x - public void should_describe_materialized_view() throws Exception { - //Given + @Disabled("TODO(n.a.) activate test when using Java 8 and C* 3.x") + void should_describe_materialized_view() throws Exception { + // Given Properties properties = new Properties(); properties.setProperty(CASSANDRA_HOSTS, "127.0.0.1"); - properties.setProperty(CASSANDRA_PORT, "9042"); + properties.setProperty(CASSANDRA_PORT, "9042"); Interpreter interpreter = new CassandraInterpreter(properties); interpreter.open(); final String query = "DESCRIBE MATERIALIZED VIEWS;"; - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); + // Then + assertEquals(Code.SUCCESS, actual.code()); } @Test - public void should_describe_table() { - //Given + void should_describe_table() { + // Given String query = "DESCRIBE TABLE live_data.complex_table;"; final String expected = reformatHtml( - readTestResource("/scalate/DescribeTable_live_data_complex_table.html")); + readTestResource("/scalate/DescribeTable_live_data_complex_table.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_describe_udt() { - //Given + void should_describe_udt() { + // Given String query = "DESCRIBE TYPE live_data.address;"; final String expected = reformatHtml( - readTestResource("/scalate/DescribeType_live_data_address.html")); + readTestResource("/scalate/DescribeType_live_data_address.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_describe_udt_withing_logged_in_keyspace() { - //Given + void should_describe_udt_withing_logged_in_keyspace() { + // Given String query = "USE live_data;\n" + - "DESCRIBE TYPE address;"; + "DESCRIBE TYPE address;"; final String expected = reformatHtml(readTestResource( - "/scalate/DescribeType_live_data_address_within_current_keyspace.html")); + "/scalate/DescribeType_live_data_address_within_current_keyspace.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_describe_all_tables() { - //Given + void should_describe_all_tables() { + // Given String query = "DESCRIBE TABLES;"; final String expected = reformatHtml(readTestResource( - "/scalate/DescribeTables.html")); + "/scalate/DescribeTables.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } @Test - public void should_describe_all_udts() { - //Given + void should_describe_all_udts() { + // Given String query = "DESCRIBE TYPES;"; final String expected = reformatHtml(readTestResource( - "/scalate/DescribeTypes.html")); + "/scalate/DescribeTypes.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).isEqualTo(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertEquals(expected, reformatHtml(actual.message().get(0).getData())); } - @Test - public void should_error_describing_non_existing_table() { - //Given + void should_error_describing_non_existing_table() { + // Given String query = "USE system;\n" + - "DESCRIBE TABLE complex_table;"; + "DESCRIBE TABLE complex_table;"; - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()) - .contains("Cannot find table system.complex_table"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertTrue(actual.message().get(0).getData().contains("Cannot find table system.complex_table"), + actual.message().get(0).getData()); } @Test - public void should_error_describing_non_existing_udt() { - //Given + void should_error_describing_non_existing_udt() { + // Given String query = "USE system;\n" + - "DESCRIBE TYPE address;"; + "DESCRIBE TYPE address;"; - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.ERROR); - assertThat(actual.message().get(0).getData()).contains("Cannot find type system.address"); + // Then + assertEquals(Code.ERROR, actual.code()); + assertTrue(actual.message().get(0).getData().contains("Cannot find type system.address"), + actual.message().get(0).getData()); } @Test - public void should_show_help() { - //Given + void should_show_help() { + // Given String query = "HELP;"; final String expected = reformatHtml(readTestResource("/scalate/Help.html")); - //When + // When final InterpreterResult actual = interpreter.interpret(query, intrContext); - //Then - assertThat(actual.code()).isEqualTo(Code.SUCCESS); - assertThat(reformatHtml(actual.message().get(0).getData())).contains(expected); + // Then + assertEquals(Code.SUCCESS, actual.code()); + assertTrue(reformatHtml(actual.message().get(0).getData()).contains(expected), + reformatHtml(actual.message().get(0).getData())); } private static String reformatHtml(String rawHtml) { - return rawHtml - .replaceAll("\\s*\n\\s*", "") - .replaceAll(">\\s+<", "><") - .replaceAll("(?s)data-target=\"#[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "") - .replaceAll("(?s)id=\"[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "") - .replaceAll("AND memtable_flush_period_in_ms = 0", "") - .trim(); + return rawHtml + .replaceAll("\\s*\n\\s*", "") + .replaceAll(">\\s+<", "><") + .replaceAll("(?s)data-target=\"#[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "") + .replaceAll("(?s)id=\"[a-f0-9-]+(?:_asCQL|_indices_asCQL)?\"", "") + .replaceAll("AND memtable_flush_period_in_ms = 0", "") + .trim(); } private static String readTestResource(String testResource) { - StringBuilder builder = new StringBuilder(); - InputStream stream = testResource.getClass().getResourceAsStream(testResource); - - try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) { - String line; - while ((line = br.readLine()) != null) { - builder.append(line).append("\n"); - } - } catch (Exception ex) { - throw new RuntimeException(ex); + try { + return IOUtils.toString( + CassandraInterpreterTest.class.getResourceAsStream(testResource), + StandardCharsets.UTF_8); + } catch (IOException ex) { + throw new RuntimeException(ex); } - - return builder.toString(); } } diff --git a/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java b/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java index fda915d2c47..c7f952c7d9f 100644 --- a/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java +++ b/cassandra/src/test/java/org/apache/zeppelin/cassandra/InterpreterLogicTest.java @@ -22,12 +22,17 @@ import static com.datastax.oss.driver.api.core.ConsistencyLevel.QUORUM; import static com.datastax.oss.driver.api.core.ConsistencyLevel.SERIAL; import static com.datastax.oss.driver.api.core.cql.BatchType.UNLOGGED; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.eq; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyZeroInteractions; +import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; import static java.util.Arrays.asList; @@ -37,16 +42,8 @@ import com.datastax.oss.driver.api.core.cql.BatchableStatement; import com.datastax.oss.driver.api.core.cql.SimpleStatement; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; import org.mockito.Answers; import org.mockito.ArgumentCaptor; -import org.mockito.Captor; -import org.mockito.Mock; -import org.mockito.runners.MockitoJUnitRunner; - import java.time.Instant; import java.time.ZoneOffset; import java.time.ZonedDateTime; @@ -71,320 +68,327 @@ import org.apache.zeppelin.display.ui.OptionInput.ParamOption; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; -@RunWith(MockitoJUnitRunner.class) -public class InterpreterLogicTest { - @Rule - public ExpectedException expectedException = ExpectedException.none(); +class InterpreterLogicTest { - @Mock(answer = Answers.RETURNS_DEEP_STUBS) private InterpreterContext intrContext; - - @Mock private CqlSession session; - final InterpreterLogic helper = new InterpreterLogic(session, new Properties()); + @BeforeEach + public void setup() { + intrContext = mock(InterpreterContext.class, Answers.RETURNS_DEEP_STUBS); + session = mock(CqlSession.class); + } - @Captor - ArgumentCaptor optionsCaptor; + final InterpreterLogic helper = new InterpreterLogic(session, new Properties()); @Test - public void should_parse_input_string_block() { - //Given + void should_parse_input_string_block() { + // Given String input = "SELECT * FROM users LIMIT 10;"; - //When + // When final List anyBlocks = this.toJavaList(helper.parseInput(input)); - //Then - assertThat(anyBlocks).hasSize(1); - assertThat(anyBlocks.get(0)).isInstanceOf(SimpleStm.class); + // Then + assertEquals(1, anyBlocks.size()); + assertTrue(anyBlocks.get(0) instanceof SimpleStm); } @Test - public void should_parse_input_string_block_with_comment_dash() { - //Given + void should_parse_input_string_block_with_comment_dash() { + // Given String input = "SELECT * FROM users LIMIT 10; -- this is a comment"; - //When + // When final List anyBlocks = this.toJavaList(helper.parseInput(input)); - //Then - assertThat(anyBlocks).hasSize(2); - assertThat(anyBlocks.get(0)).isInstanceOf(SimpleStm.class); - assertThat(anyBlocks.get(1)).isInstanceOf(TextBlockHierarchy.Comment.class); + // Then + assertEquals(2, anyBlocks.size()); + assertTrue(anyBlocks.get(0) instanceof SimpleStm); + assertTrue(anyBlocks.get(1) instanceof TextBlockHierarchy.Comment); } @Test - public void should_parse_input_string_block_with_comment_slash() { - //Given + void should_parse_input_string_block_with_comment_slash() { + // Given String input = "SELECT * FROM users LIMIT 10; // this is a comment"; - //When + // When final List anyBlocks = this.toJavaList(helper.parseInput(input)); - //Then - assertThat(anyBlocks).hasSize(2); - assertThat(anyBlocks.get(0)).isInstanceOf(SimpleStm.class); - assertThat(anyBlocks.get(1)).isInstanceOf(TextBlockHierarchy.Comment.class); + // Then + assertEquals(2, anyBlocks.size()); + assertTrue(anyBlocks.get(0) instanceof SimpleStm); + assertTrue(anyBlocks.get(1) instanceof TextBlockHierarchy.Comment); } @Test - public void should_exception_while_parsing_input() { - //Given + void should_exception_while_parsing_input() { + // Given String input = "SELECT * FROM users LIMIT 10"; - //When - expectedException.expect(InterpreterException.class); - expectedException.expectMessage("Error parsing input:\n" + - "\t'SELECT * FROM users LIMIT 10'\n" + - "Did you forget to add ; (semi-colon) at the end of each CQL statement ?"); + // When + InterpreterException ex = assertThrows(InterpreterException.class, () -> { + helper.parseInput(input); + }); + + assertEquals("Error parsing input:\n" + + "\t'SELECT * FROM users LIMIT 10'\n" + + "Did you forget to add ; (semi-colon) at the end of each CQL statement ?", ex.getMessage()); - helper.parseInput(input); } @Test - public void should_extract_variable_and_default_value() { - //Given + void should_extract_variable_and_default_value() { + // Given AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null); when(intrContext.getAngularObjectRegistry()).thenReturn(angularObjectRegistry); - when(intrContext.getGui().input("table", "zeppelin.demo")).thenReturn("zeppelin.demo"); - when(intrContext.getGui().input("id", "'John'")).thenReturn("'John'"); + when(intrContext.getGui().textbox("table", "zeppelin.demo")).thenReturn("zeppelin.demo"); + when(intrContext.getGui().textbox("id", "'John'")).thenReturn("'John'"); - //When + // When final String actual = helper.maybeExtractVariables( - "SELECT * FROM {{table=zeppelin.demo}} WHERE id={{id='John'}}", intrContext); + "SELECT * FROM {{table=zeppelin.demo}} WHERE id={{id='John'}}", intrContext); - //Then - assertThat(actual).isEqualTo("SELECT * FROM zeppelin.demo WHERE id='John'"); + // Then + assertEquals("SELECT * FROM zeppelin.demo WHERE id='John'", actual); } @Test - public void should_extract_variable_and_choices() { - //Given + void should_extract_variable_and_choices() { + // Given AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null); when(intrContext.getAngularObjectRegistry()).thenReturn(angularObjectRegistry); - when(intrContext.getGui().select(eq("name"), optionsCaptor.capture(), eq("'Paul'"))) - .thenReturn("'Jack'"); - - //When + ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(ParamOption[].class); + when(intrContext.getGui().select(any(), any(), any())).thenReturn("'Jack'"); + // When final String actual = helper.maybeExtractVariables( - "SELECT * FROM zeppelin.artists WHERE name={{name='Paul'|'Jack'|'Smith'}}", - intrContext); - - //Then - assertThat(actual).isEqualTo("SELECT * FROM zeppelin.artists WHERE name='Jack'"); + "SELECT * FROM zeppelin.artists WHERE name={{name='Paul'|'Jack'|'Smith'}}", + intrContext); + verify(intrContext.getGui()).select(eq("name"), optionsCaptor.capture(), eq("'Paul'")); + // Then + assertEquals("SELECT * FROM zeppelin.artists WHERE name='Jack'", actual); final List paramOptions = asList(optionsCaptor.getValue()); - assertThat(paramOptions.get(0).getValue()).isEqualTo("'Paul'"); - assertThat(paramOptions.get(1).getValue()).isEqualTo("'Jack'"); - assertThat(paramOptions.get(2).getValue()).isEqualTo("'Smith'"); + assertEquals("'Paul'", paramOptions.get(0).getValue()); + assertEquals("'Jack'", paramOptions.get(1).getValue()); + assertEquals("'Smith'", paramOptions.get(2).getValue()); } @Test - public void should_extract_no_variable() { - //Given + void should_extract_no_variable() { + // Given GUI gui = mock(GUI.class); when(intrContext.getGui()).thenReturn(gui); - //When + // When final String actual = helper.maybeExtractVariables("SELECT * FROM zeppelin.demo", intrContext); - //Then - verifyZeroInteractions(gui); - assertThat(actual).isEqualTo("SELECT * FROM zeppelin.demo"); + // Then + verifyNoInteractions(gui); + assertEquals("SELECT * FROM zeppelin.demo", actual); } @Test - public void should_extract_variable_from_angular_object_registry() { - //Given + void should_extract_variable_from_angular_object_registry() { + // Given AngularObjectRegistry angularObjectRegistry = new AngularObjectRegistry("cassandra", null); angularObjectRegistry.add("id", "from_angular_registry", "noteId", "paragraphId"); when(intrContext.getAngularObjectRegistry()).thenReturn(angularObjectRegistry); when(intrContext.getNoteId()).thenReturn("noteId"); when(intrContext.getParagraphId()).thenReturn("paragraphId"); - //When + // When final String actual = helper.maybeExtractVariables( - "SELECT * FROM zeppelin.demo WHERE id='{{id=John}}'", intrContext); + "SELECT * FROM zeppelin.demo WHERE id='{{id=John}}'", intrContext); - //Then - assertThat(actual).isEqualTo("SELECT * FROM zeppelin.demo WHERE id='from_angular_registry'"); + // Then + assertEquals("SELECT * FROM zeppelin.demo WHERE id='from_angular_registry'", actual); verify(intrContext, never()).getGui(); } @Test public void should_error_if_incorrect_variable_definition() { - //Given - - //When - expectedException.expect(ParsingException.class); - expectedException.expectMessage("Invalid bound variable definition for " + - "'{{table?zeppelin.demo}}' in 'SELECT * FROM {{table?zeppelin.demo}} " + - "WHERE id={{id='John'}}'. It should be of form 'variable=defaultValue'"); + // Given + + // When + ParsingException thrown = assertThrows(ParsingException.class, () -> { + // Then + helper.maybeExtractVariables("SELECT * FROM {{table?zeppelin.demo}} WHERE id={{id='John'}}", + intrContext); + }); + assertEquals("Invalid bound variable definition for " + + "'{{table?zeppelin.demo}}' in 'SELECT * FROM {{table?zeppelin.demo}} " + + "WHERE id={{id='John'}}'. It should be of form 'variable=defaultValue' " + + "or 'variable=value1|value2|...|valueN'", + thrown.getMessage()); - //Then - helper.maybeExtractVariables("SELECT * FROM {{table?zeppelin.demo}} WHERE id={{id='John'}}", - intrContext); } @Test - public void should_extract_consistency_option() { - //Given + void should_extract_consistency_option() { + // Given List options = Arrays.asList(new Consistency(ALL), - new Consistency(ONE)); + new Consistency(ONE)); - //When + // When final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options)); - //Then - assertThat(actual.consistency().get()).isEqualTo(ALL); + // Then + assertEquals(ALL, actual.consistency().get()); } @Test - public void should_extract_serial_consistency_option() { - //Given + void should_extract_serial_consistency_option() { + // Given List options = Arrays.asList(new SerialConsistency(SERIAL), - new SerialConsistency(LOCAL_SERIAL)); + new SerialConsistency(LOCAL_SERIAL)); - //When + // When final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options)); - //Then - assertThat(actual.serialConsistency().get()).isEqualTo(SERIAL); + // Then + assertEquals(SERIAL, actual.serialConsistency().get()); } @Test - public void should_extract_timestamp_option() { - //Given + void should_extract_timestamp_option() { + // Given List options = Arrays.asList(new Timestamp(123L), - new Timestamp(456L)); + new Timestamp(456L)); - //When + // When final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options)); - //Then - assertThat(actual.timestamp().get()).isEqualTo(123L); + // Then + assertEquals(123L, actual.timestamp().get()); } @Test - public void should_extract_request_timeout_option() { - //Given + void should_extract_request_timeout_option() { + // Given List options = Collections.singletonList(new RequestTimeOut(100)); - //When + // When final CassandraQueryOptions actual = helper.extractQueryOptions(toScalaList(options)); - //Then - assertThat(actual.requestTimeOut().get()).isEqualTo(100); + // Then + assertEquals(100, actual.requestTimeOut().get()); } @Test - public void should_generate_simple_statement() { - //Given + void should_generate_simple_statement() { + // Given String input = "SELECT * FROM users LIMIT 10;"; CassandraQueryOptions options = new CassandraQueryOptions(Option.apply(QUORUM), - Option.empty(), - Option.empty(), - Option.empty(), - Option.empty()); + Option.empty(), + Option.empty(), + Option.empty(), + Option.empty()); - //When + // When final SimpleStatement actual = helper.generateSimpleStatement(new SimpleStm(input), options, - intrContext); + intrContext); - //Then - assertThat(actual).isNotNull(); - assertThat(actual.getQuery()).isEqualTo("SELECT * FROM users LIMIT 10;"); - assertThat(actual.getConsistencyLevel()).isSameAs(QUORUM); + // Then + assertNotNull(actual); + assertEquals("SELECT * FROM users LIMIT 10;", actual.getQuery()); + assertSame(QUORUM, actual.getConsistencyLevel()); } @Test - public void should_generate_batch_statement() { - //Given + void should_generate_batch_statement() { + // Given SimpleStatement st1 = SimpleStatement.newInstance("SELECT * FROM users LIMIT 10;"); SimpleStatement st2 = SimpleStatement.newInstance("INSERT INTO users(id) VALUES(10);"); SimpleStatement st3 = SimpleStatement.newInstance( - "UPDATE users SET name = 'John DOE' WHERE id=10;"); + "UPDATE users SET name = 'John DOE' WHERE id=10;"); CassandraQueryOptions options = new CassandraQueryOptions(Option.apply(QUORUM), - Option.empty(), - Option.empty(), - Option.empty(), - Option.empty()); + Option.empty(), + Option.empty(), + Option.empty(), + Option.empty()); - //When + // When BatchStatement actual = helper.generateBatchStatement(UNLOGGED, options, - toScalaList(asList(st1, st2, st3))); + toScalaList(asList(st1, st2, st3))); - //Then - assertThat(actual).isNotNull(); + // Then + assertNotNull(actual); List statements = new ArrayList<>(); - for (BatchableStatement b: actual) { + for (BatchableStatement b : actual) { statements.add(b); } - assertThat(statements).hasSize(3); - assertThat(statements.get(0)).isSameAs(st1); - assertThat(statements.get(1)).isSameAs(st2); - assertThat(statements.get(2)).isSameAs(st3); - assertThat(actual.getConsistencyLevel()).isSameAs(QUORUM); + assertEquals(3, statements.size()); + assertSame(st1, statements.get(0)); + assertSame(st2, statements.get(1)); + assertSame(st3, statements.get(2)); + assertSame(QUORUM, actual.getConsistencyLevel()); } @Test - public void should_parse_bound_values() { - //Given + void should_parse_bound_values() { + // Given String bs = "'jdoe',32,'John DOE',null, true, '2014-06-12 34:00:34'"; - //When + // When final List actual = this.toJavaList(helper.parseBoundValues("ps", bs)); - //Then - assertThat(actual).containsExactly("'jdoe'", "32", "'John DOE'", - "null", "true", "2014-06-12 34:00:34"); + // Then + assertEquals("'jdoe'", actual.get(0)); + assertEquals("32", actual.get(1)); + assertEquals("'John DOE'", actual.get(2)); + assertEquals("null", actual.get(3)); + assertEquals("true", actual.get(4)); + assertEquals("2014-06-12 34:00:34", actual.get(5)); } @Test - public void should_parse_simple_date() { - //Given + void should_parse_simple_date() { + // Given String dateString = "2015-07-30 12:00:01"; - //When + // When final Instant actual = helper.parseDate(dateString); - //Then + // Then ZonedDateTime dt = actual.atZone(ZoneOffset.UTC); - assertThat(dt.getLong(ChronoField.YEAR_OF_ERA)).isEqualTo(2015); - assertThat(dt.getLong(ChronoField.MONTH_OF_YEAR)).isEqualTo(7); - assertThat(dt.getLong(ChronoField.DAY_OF_MONTH)).isEqualTo(30); - assertThat(dt.getLong(ChronoField.HOUR_OF_DAY)).isEqualTo(12); - assertThat(dt.getLong(ChronoField.MINUTE_OF_HOUR)).isEqualTo(0); - assertThat(dt.getLong(ChronoField.SECOND_OF_MINUTE)).isEqualTo(1); + assertEquals(2015, dt.getLong(ChronoField.YEAR_OF_ERA)); + assertEquals(7, dt.getLong(ChronoField.MONTH_OF_YEAR)); + assertEquals(30, dt.getLong(ChronoField.DAY_OF_MONTH)); + assertEquals(12, dt.getLong(ChronoField.HOUR_OF_DAY)); + assertEquals(0, dt.getLong(ChronoField.MINUTE_OF_HOUR)); + assertEquals(1, dt.getLong(ChronoField.SECOND_OF_MINUTE)); } @Test - public void should_parse_accurate_date() { - //Given + void should_parse_accurate_date() { + // Given String dateString = "2015-07-30 12:00:01.123"; - //When + // When final Instant actual = helper.parseDate(dateString); - //Then + // Then ZonedDateTime dt = actual.atZone(ZoneOffset.UTC); - assertThat(dt.getLong(ChronoField.YEAR_OF_ERA)).isEqualTo(2015); - assertThat(dt.getLong(ChronoField.MONTH_OF_YEAR)).isEqualTo(7); - assertThat(dt.getLong(ChronoField.DAY_OF_MONTH)).isEqualTo(30); - assertThat(dt.getLong(ChronoField.HOUR_OF_DAY)).isEqualTo(12); - assertThat(dt.getLong(ChronoField.MINUTE_OF_HOUR)).isEqualTo(0); - assertThat(dt.getLong(ChronoField.SECOND_OF_MINUTE)).isEqualTo(1); - assertThat(dt.getLong(ChronoField.MILLI_OF_SECOND)).isEqualTo(123); + assertEquals(2015, dt.getLong(ChronoField.YEAR_OF_ERA)); + assertEquals(7, dt.getLong(ChronoField.MONTH_OF_YEAR)); + assertEquals(30, dt.getLong(ChronoField.DAY_OF_MONTH)); + assertEquals(12, dt.getLong(ChronoField.HOUR_OF_DAY)); + assertEquals(0, dt.getLong(ChronoField.MINUTE_OF_HOUR)); + assertEquals(1, dt.getLong(ChronoField.SECOND_OF_MINUTE)); + assertEquals(123, dt.getLong(ChronoField.MILLI_OF_SECOND)); } - private scala.collection.immutable.List toScalaList(java.util.List list) { + private scala.collection.immutable.List toScalaList(java.util.List list) { return scala.collection.JavaConversions.collectionAsScalaIterable(list).toList(); } - private java.util.List toJavaList(scala.collection.immutable.List list){ + private java.util.List toJavaList(scala.collection.immutable.List list) { return scala.collection.JavaConversions.seqAsJavaList(list); } } diff --git a/cassandra/src/test/resources/scalate/DescribeKeyspaces.html b/cassandra/src/test/resources/scalate/DescribeKeyspaces.html index ac48bd34117..f06b9406c3f 100644 --- a/cassandra/src/test/resources/scalate/DescribeKeyspaces.html +++ b/cassandra/src/test/resources/scalate/DescribeKeyspaces.html @@ -1 +1 @@ -


     Test Cluster

    Partitioner
    org.apache.cassandra.dht.Murmur3Partitioner

    Keyspaces
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}false
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}false
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.LocalStrategy'}true
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}true
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '3'}true
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.LocalStrategy'}true
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '2'}true
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}false
    \ No newline at end of file +


     Test Cluster

    Partitioner
    org.apache.cassandra.dht.Murmur3Partitioner

    Keyspaces
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}false
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}false
    ReplicationDurable Writes
    {'class' : 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor' : '1'}false
    \ No newline at end of file diff --git a/cassandra/src/test/resources/scalate/DescribeTables.html b/cassandra/src/test/resources/scalate/DescribeTables.html index 05992914d3c..cba4b82536e 100644 --- a/cassandra/src/test/resources/scalate/DescribeTables.html +++ b/cassandra/src/test/resources/scalate/DescribeTables.html @@ -1 +1 @@ -


    Tables
    complex_table
    sensor_data
    stations
    Tables
    "IndexInfo"
    available_ranges
    batches
    batchlog
    built_views
    compaction_history
    hints
    local
    paxos
    peer_events
    peers
    prepared_statements
    range_xfers
    size_estimates
    sstable_activity
    transferred_ranges
    views_builds_in_progress
    Tables
    resource_role_permissons_index
    role_members
    role_permissions
    roles
    Tables
    parent_repair_history
    repair_history
    view_build_status
    Tables
    aggregates
    columns
    dropped_columns
    functions
    indexes
    keyspaces
    tables
    triggers
    types
    views
    Tables
    events
    sessions
    Tables
    artists
    no_select
    prepared
    test_format
    ts
    users
    \ No newline at end of file +


    Tables
    complex_table
    sensor_data
    stations
    Tables
    artists
    no_select
    prepared
    test_format
    ts
    users
    \ No newline at end of file diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala index de14c880d55..b8ed391c7c8 100644 --- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala +++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/BoundValuesParserTest.scala @@ -16,11 +16,11 @@ */ package org.apache.zeppelin.cassandra -import org.scalatest.{Matchers, BeforeAndAfterEach, FlatSpec} +import org.scalatest.BeforeAndAfterEach +import org.scalatest.flatspec._ +import org.scalatest.matchers.should.Matchers._ -class BoundValuesParserTest extends FlatSpec -with BeforeAndAfterEach -with Matchers { +class BoundValuesParserTest extends AnyFlatSpec with BeforeAndAfterEach { val parser = new BoundValuesParser diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala index bf86227929c..3b0ae8f4656 100644 --- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala +++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/CqlFormatterTest.scala @@ -23,13 +23,13 @@ import java.util.Properties import com.datastax.oss.driver.api.core.`type`.DataTypes import com.datastax.oss.driver.api.core.`type`.codec.registry.CodecRegistry -import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers} +import org.scalatest.BeforeAndAfterEach +import org.scalatest.flatspec._ +import org.scalatest.matchers.should.Matchers._ import scala.collection.JavaConverters._ -class CqlFormatterTest extends FlatSpec - with BeforeAndAfterEach - with Matchers { +class CqlFormatterTest extends AnyFlatSpec with BeforeAndAfterEach { val longVal: java.lang.Long = java.lang.Long.valueOf(12345678901L) val floatVal: java.lang.Float = java.lang.Float.valueOf(123.456789f) diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala index 006fc144b2d..2dcfe7e4e36 100644 --- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala +++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/EnhancedSessionTest.scala @@ -18,9 +18,9 @@ package org.apache.zeppelin.cassandra import com.datastax.oss.driver.api.core.cql.{BatchStatement, BatchType, SimpleStatement} -import org.scalatest.FlatSpec +import org.scalatest.flatspec._ -class EnhancedSessionTest extends FlatSpec { +class EnhancedSessionTest extends AnyFlatSpec { "Query" should "be detected as DDL for create" in { assertResult(true){ diff --git a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala index 19afafcbefe..794547f80f3 100644 --- a/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala +++ b/cassandra/src/test/scala/org/apache/zeppelin/cassandra/ParagraphParserTest.scala @@ -19,19 +19,16 @@ package org.apache.zeppelin.cassandra import com.datastax.oss.driver.api.core.{ConsistencyLevel, CqlSession} import com.datastax.oss.driver.api.core.cql.{BatchType, PreparedStatement} import org.apache.zeppelin.interpreter.InterpreterException -import org.scalatest.mock.MockitoSugar -import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers} +import org.mockito.Mockito +import org.scalatest.BeforeAndAfterEach +import org.scalatest.flatspec._ +import org.scalatest.matchers.should.Matchers._ import org.apache.zeppelin.cassandra.ParagraphParser._ import org.apache.zeppelin.cassandra.TextBlockHierarchy._ -import scala.Option +class ParagraphParserTest extends AnyFlatSpec with BeforeAndAfterEach { -class ParagraphParserTest extends FlatSpec - with BeforeAndAfterEach - with Matchers - with MockitoSugar { - - val session: CqlSession = mock[CqlSession] + val session: CqlSession = Mockito.mock[CqlSession](classOf[CqlSession]) val preparedStatements:collection.mutable.Map[String,PreparedStatement] = collection.mutable.Map() val parser: ParagraphParser = new ParagraphParser() diff --git a/conf/interpreter-list b/conf/interpreter-list index 4be4d7ddd10..8897a48b60a 100644 --- a/conf/interpreter-list +++ b/conf/interpreter-list @@ -17,32 +17,20 @@ # # [name] [maven artifact] [description] -alluxio org.apache.zeppelin:zeppelin-alluxio:0.10.0 Alluxio interpreter -angular org.apache.zeppelin:zeppelin-angular:0.10.0 HTML and AngularJS view rendering -beam org.apache.zeppelin:zeppelin-beam:0.10.0 Beam interpreter -bigquery org.apache.zeppelin:zeppelin-bigquery:0.10.0 BigQuery interpreter -cassandra org.apache.zeppelin:zeppelin-cassandra:0.10.0 Cassandra interpreter -elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.10.0 Elasticsearch interpreter -file org.apache.zeppelin:zeppelin-file:0.10.0 HDFS file interpreter -flink org.apache.zeppelin:zeppelin-flink:0.10.0 Flink interpreter -geode org.apache.zeppelin:zeppelin-geode:0.10.0 Apache Geode interpreter -groovy org.apache.zeppelin:zeppelin-groovy:0.10.0 Groovy interpreter -hazelcastjet org.apache.zeppelin:zeppelin-hazelcastjet:0.10.0 Hazelcast Jet interpreter -hbase org.apache.zeppelin:zeppelin-hbase:0.10.0 Hbase interpreter -ignite org.apache.zeppelin:zeppelin-ignite:0.10.0 Ignite interpreter -java org.apache.zeppelin:zeppelin-java:0.10.0 Java interpreter -jdbc org.apache.zeppelin:zeppelin-jdbc:0.10.0 Jdbc interpreter -kotlin org.apache.zeppelin:zeppelin-kotlin:0.10.0 Kotlin interpreter -kylin org.apache.zeppelin:zeppelin-kylin:0.10.0 Kylin interpreter -lens org.apache.zeppelin:zeppelin-lens:0.10.0 Lens interpreter -livy org.apache.zeppelin:zeppelin-livy:0.10.0 Livy interpreter -md org.apache.zeppelin:zeppelin-markdown:0.10.0 Markdown support -neo4j org.apache.zeppelin:zeppelin-neo4j:0.10.0 Neo4j interpreter -pig org.apache.zeppelin:zeppelin-pig:0.10.0 Pig interpreter -python org.apache.zeppelin:zeppelin-python:0.10.0 Python interpreter -sap org.apache.zeppelin:zeppelin-sap:0.10.0 SAP Support -scalding org.apache.zeppelin:zeppelin-scalding_2.0.10:0.10.0 Scalding interpreter -scio org.apache.zeppelin:zeppelin-scio:0.10.0 Scio interpreter -shell org.apache.zeppelin:zeppelin-shell:0.10.0 Shell command -sparql org.apache.zeppelin:zeppelin-sparql:0.10.0 Sparql interpreter -submarine org.apache.zeppelin:zeppelin-submarine:0.10.0 Submarine interpreter +alluxio org.apache.zeppelin:zeppelin-alluxio:0.12.0-SNAPSHOT Alluxio interpreter +angular org.apache.zeppelin:zeppelin-angular:0.12.0-SNAPSHOT HTML and AngularJS view rendering +bigquery org.apache.zeppelin:zeppelin-bigquery:0.12.0-SNAPSHOT BigQuery interpreter +cassandra org.apache.zeppelin:zeppelin-cassandra:0.12.0-SNAPSHOT Cassandra interpreter +elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.12.0-SNAPSHOT Elasticsearch interpreter +file org.apache.zeppelin:zeppelin-file:0.12.0-SNAPSHOT HDFS file interpreter +flink org.apache.zeppelin:zeppelin-flink:0.12.0-SNAPSHOT Flink interpreter +groovy org.apache.zeppelin:zeppelin-groovy:0.12.0-SNAPSHOT Groovy interpreter +hbase org.apache.zeppelin:zeppelin-hbase:0.12.0-SNAPSHOT Hbase interpreter +java org.apache.zeppelin:zeppelin-java:0.12.0-SNAPSHOT Java interpreter +jdbc org.apache.zeppelin:zeppelin-jdbc:0.12.0-SNAPSHOT Jdbc interpreter +livy org.apache.zeppelin:zeppelin-livy:0.12.0-SNAPSHOT Livy interpreter +md org.apache.zeppelin:zeppelin-markdown:0.12.0-SNAPSHOT Markdown support +neo4j org.apache.zeppelin:zeppelin-neo4j:0.12.0-SNAPSHOT Neo4j interpreter +python org.apache.zeppelin:zeppelin-python:0.12.0-SNAPSHOT Python interpreter +shell org.apache.zeppelin:zeppelin-shell:0.12.0-SNAPSHOT Shell command +sparql org.apache.zeppelin:zeppelin-sparql:0.12.0-SNAPSHOT Sparql interpreter diff --git a/conf/shiro.ini.template b/conf/shiro.ini.template index 13db835a178..363b222334f 100644 --- a/conf/shiro.ini.template +++ b/conf/shiro.ini.template @@ -50,12 +50,6 @@ user3 = password4, role2 #pamRealm=org.apache.zeppelin.realm.PamRealm #pamRealm.service=sshd -### A sample for configuring ZeppelinHub Realm -#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm -## Url of ZeppelinHub -#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com -#securityManager.realms = $zeppelinHubRealm - ## A same for configuring Knox SSO Realm #knoxJwtRealm = org.apache.zeppelin.realm.jwt.KnoxJwtRealm #knoxJwtRealm.providerUrl = https://domain.example.com/ diff --git a/conf/zeppelin-env.cmd.template b/conf/zeppelin-env.cmd.template index 83b610e07a3..15c88fd4ca8 100644 --- a/conf/zeppelin-env.cmd.template +++ b/conf/zeppelin-env.cmd.template @@ -64,7 +64,7 @@ REM however, it is not encouraged when you can define SPARK_HOME REM REM Options read in YARN client mode REM set HADOOP_CONF_DIR REM yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. -REM Pyspark (supported with Spark 1.2.1 and above) +REM Pyspark (supported with Spark 3.3 and above) REM To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI REM set PYSPARK_PYTHON REM path to the python command. must be the same path on the driver(Zeppelin) and all workers. REM set PYTHONPATH @@ -75,9 +75,3 @@ REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default. REM set ZEPPELIN_SPARK_IMPORTIMPLICIT REM Import implicits, UDF collection, and sql if set true. true by default. REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of Spark SQL result to display. 1000 by default. - -REM ZeppelinHub connection configuration -REM -REM set ZEPPELINHUB_API_ADDRESS REM Refers to the address of the ZeppelinHub service in use -REM set ZEPPELINHUB_API_TOKEN REM Refers to the Zeppelin instance token of the user -REM set ZEPPELINHUB_USER_KEY REM Optional, when using Zeppelin with authentication. diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template index 7c4a38b7cf7..e27a688becd 100644 --- a/conf/zeppelin-env.sh.template +++ b/conf/zeppelin-env.sh.template @@ -87,7 +87,7 @@ ## # Options read in YARN client mode # export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. -# Pyspark (supported with Spark 1.2.1 and above) +# Pyspark (supported with Spark 3.3 and above) # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI # export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers. # export PYTHONPATH @@ -107,11 +107,6 @@ # export HBASE_HOME= # (require) Under which HBase scripts and configuration should be # export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml -#### ZeppelinHub connection configuration #### -# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use -# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user -# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication. - #### Zeppelin impersonation configuration # export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index 53b0144dcdc..b1143c35283 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -22,7 +22,7 @@ zeppelin.server.addr 127.0.0.1 - Server binding address + Server binding address. If you cannot connect to your web browser on WSL or Windows, change 127.0.0.1 to 0.0.0.0. It, however, causes security issues when you open your machine to the public @@ -91,6 +91,12 @@ Enable collaborative mode + + zeppelin.notebook.versioned.mode.enable + true + Value to enable/disable version control support in Notes + + - - - @@ -426,8 +423,8 @@ zeppelin.interpreter.connect.timeout - 60000 - Interpreter process connect timeout in msec. + 600s + Interpreter process connect timeout. Default time unit is msec. @@ -578,14 +575,14 @@ zeppelin.interpreter.lifecyclemanager.timeout.checkinterval - 60000 - Milliseconds of the interval to checking whether interpreter is time out + 1m + Interval to checking whether interpreter is time out zeppelin.interpreter.lifecyclemanager.timeout.threshold - 3600000 - Milliseconds of the interpreter timeout threshold, by default it is 1 hour + 1h + Interpreter timeout threshold, by default it is 1 hour --> diff --git a/dev/change_scala_version.sh b/dev/change_scala_version.sh deleted file mode 100755 index 0ccfe7e263f..00000000000 --- a/dev/change_scala_version.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -e - -VALID_VERSIONS=( 2.10 2.11 ) - -usage() { - echo "Usage: $(basename $0) [-h|--help] -where : - -h| --help Display this help text - valid version values : ${VALID_VERSIONS[*]} -" 1>&2 - exit 1 -} - -if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then - usage -fi - -TO_VERSION="$1" - -check_scala_version() { - for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done - echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2 - exit 1 -} - -check_scala_version "${TO_VERSION}" - -if [ "${TO_VERSION}" = "2.11" ]; then - FROM_VERSION="2.10" - SCALA_LIB_VERSION="2.11.7" -else - FROM_VERSION="2.11" - SCALA_LIB_VERSION="2.10.5" -fi - -sed_i() { - sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2" -} - -export -f sed_i - -BASEDIR=$(dirname $0)/.. -find "${BASEDIR}" -name 'pom.xml' -not -path '*target*' -print \ - -exec bash -c "sed_i 's/\(artifactId.*\)_'${FROM_VERSION}'/\1_'${TO_VERSION}'/g' {}" \; - -# update in parent POM -# Match any scala binary version to ensure idempotency -sed_i '1,/[0-9]*\.[0-9]*[0-9]*\.[0-9]*'${TO_VERSION}' in parent POM -# This is to make variables in leaf pom to be substituted to real value when flattened-pom is created. -# maven-flatten plugin doesn't take properties defined under profile even if scala-2.11/scala-2.10 is activated via -Pscala-2.11/-Pscala-2.10, -# and use default defined properties to create flatten pom. -sed_i '1,/[0-9]*\.[0-9]*\.[0-9]*[0-9]*\.[0-9]*\.[0-9]*'${SCALA_LIB_VERSION}' /dev/null 2>&1 +./mvnw versions:set -DnewVersion="${TO_VERSION}" -DgenerateBackupPoms=false > /dev/null 2>&1 # Change version in example and package files sed -i '' 's/-'"${FROM_VERSION}"'.jar",/-'"${TO_VERSION}"'.jar",/g' zeppelin-examples/zeppelin-example-clock/zeppelin-example-clock.json @@ -61,6 +61,12 @@ sed -i '' 's/"version": "'"${FROM_VERSION}"'",/"version": "'"${TO_VERSION}"'",/g # Change version in Dockerfile sed -i '' 's/Z_VERSION="'"${FROM_VERSION}"'"/Z_VERSION="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin/bin/Dockerfile +sed -i '' 's/version="'"${FROM_VERSION}"'"/version="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin-interpreter/Dockerfile +sed -i '' 's/version="'"${FROM_VERSION}"'"/version="'"${TO_VERSION}"'"/g' scripts/docker/zeppelin-server/Dockerfile + +# Change version in Kubernetes yaml +sed -i '' 's/zeppelin-interpreter:${FROM_VERSION}/zeppelin-interpreter:${TO_VERSION}/g' k8s/zeppelin-server.yaml +sed -i '' 's/zeppelin-server:${FROM_VERSION}/zeppelin-server:${TO_VERSION}/g' k8s/zeppelin-server.yaml # Change docker image version in configuration sed -i '' sed 's/zeppelin:'"${OLD_VERSION}"'/zeppelin:'"${NEW_VERSION}"'/g' conf/zeppelin-site.xml.template diff --git a/dev/checkout_zeppelin_pr.sh b/dev/checkout_zeppelin_pr.sh new file mode 100755 index 00000000000..08a1a00ec7e --- /dev/null +++ b/dev/checkout_zeppelin_pr.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This utility creates a local branch PR_ from specified pull request, +# to help the test and review. +# +# Prerequisites: +# Add Apache Zeppelin as remote repo, with name "apache" (or something else +# defined by environment variable APACHE_ZEPPELIN_REMOTE_REPO_NAME) +# +# git remote add apache git@github.com:apache/zeppelin.git +# + +set -o pipefail +set -e +set -x + +APACHE_ZEPPELIN_REMOTE_REPO_NAME=${APACHE_ZEPPELIN_REMOTE_REPO_NAME:-"apache"} + +function usage { + echo "Usage: dev/checkout_zeppelin_pr.sh [-f] " + echo " -f force overwrite of local branch (default: fail if exists)" + exit 1 +} + +if [[ ${#} -eq 0 ]]; then + usage +fi + +FORCE="" +while getopts ":f" arg; do + case "${arg}" in + f) + FORCE="--force" + ;; + ?) + usage + ;; + esac +done +shift "$(($OPTIND -1))" + +PR_NUM=$1 + +git fetch ${APACHE_ZEPPELIN_REMOTE_REPO_NAME} pull/${PR_NUM}/head:PR_${PR_NUM} ${FORCE} +git checkout PR_${PR_NUM} diff --git a/dev/common_release.sh b/dev/common_release.sh index fda1700398a..0d39a589318 100644 --- a/dev/common_release.sh +++ b/dev/common_release.sh @@ -20,11 +20,18 @@ # common fucntions if [[ -z "${TAR}" ]]; then - TAR="/usr/bin/tar" + TAR="tar" + if [ "$(uname -s)" = "Darwin" ]; then + export COPYFILE_DISABLE=1 + TAR="tar --no-mac-metadata --no-xattrs --no-fflags" + fi fi if [[ -z "${SHASUM}" ]]; then - SHASUM="/usr/bin/shasum" + SHASUM="sha512sum" + if [ "$(uname -s)" = "Darwin" ]; then + SHASUM="shasum -a 512" + fi fi if [[ -z "${WORKING_DIR}" ]]; then diff --git a/dev/create_release.sh b/dev/create_release.sh index 536f3a5b96c..b5529169f6c 100755 --- a/dev/create_release.sh +++ b/dev/create_release.sh @@ -42,7 +42,6 @@ done RELEASE_VERSION="$1" GIT_TAG="$2" -SCALA_VERSION="2.11" function make_source_package() { # create source package @@ -65,16 +64,16 @@ function make_binary_release() { cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}" cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}" - echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}" - mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS} + echo "./mvnw clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}" + ./mvnw clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS} if [[ $? -ne 0 ]]; then echo "Build failed. ${BUILD_FLAGS}" exit 1 fi # re-create package with proper dir name with binary license - cd zeppelin-distribution/target/zeppelin-* - mv zeppelin-* "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}" + cd zeppelin-distribution/target/zeppelin-${RELEASE_VERSION}-bin + mv zeppelin-${RELEASE_VERSION}-bin "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}" cat ../../src/bin_license/LICENSE >> "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}/LICENSE" cat ../../src/bin_license/NOTICE >> "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}/NOTICE" cp ../../src/bin_license/licenses/* "zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}/licenses/" @@ -97,8 +96,10 @@ function make_binary_release() { git_clone make_source_package -make_binary_release netinst "-Pweb-angular -Phadoop-2.6 -pl !beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql,!mongodb,!ksql,!scalding -am" -make_binary_release all "-Pweb-angular -Phadoop-2.6" + +make_binary_release netinst "-Pweb-classic -pl !hbase,!jdbc,!file,!flink,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!java,!neo4j,!sparql,!mongodb,!shell -am" + +make_binary_release all "-Pweb-classic -pl !shell" # remove non release files and dirs rm -rf "${WORKING_DIR}/zeppelin" diff --git a/dev/publish_release.sh b/dev/publish_release.sh index b389fd794e1..1b26253ca6c 100755 --- a/dev/publish_release.sh +++ b/dev/publish_release.sh @@ -46,7 +46,7 @@ if [[ $RELEASE_VERSION == *"SNAPSHOT"* ]]; then DO_SNAPSHOT="yes" fi -PUBLISH_PROFILES="-Ppublish-distr -Phadoop-2.6 -Pweb-angular" +PUBLISH_PROFILES="-Ppublish-distr -Pweb-classic" PROJECT_OPTIONS="-pl !zeppelin-distribution -Dmaven.javadoc.skip=true" NEXUS_STAGING="https://repository.apache.org/service/local/staging" NEXUS_PROFILE="153446d1ac37c4" @@ -84,20 +84,16 @@ function publish_snapshot_to_maven() { tmp_repo="$(mktemp -d /tmp/zeppelin-repo-XXXXX)" - mvn versions:set -DnewVersion=$RELEASE_VERSION + ./mvnw versions:set -DnewVersion=$RELEASE_VERSION tmp_settings="tmp-settings.xml" echo "" > $tmp_settings echo "apache.snapshots.https$ASF_USERID" >> $tmp_settings echo "$ASF_PASSWORD" >> $tmp_settings echo "" >> $tmp_settings - mvn --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -Pbeam -DskipTests \ + ./mvnw --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -DskipTests \ $PUBLISH_PROFILES -Drat.skip=true deploy - "${BASEDIR}/change_scala_version.sh" 2.11 - mvn -Pscala-2.11 --settings $tmp_settings -Dmaven.repo.local="${tmp_repo}" -Pbeam -DskipTests \ - $PUBLISH_PROFILES -Drat.skip=true clean deploy - rm $tmp_settings rm -rf $tmp_repo } @@ -106,7 +102,7 @@ function publish_to_maven() { cd "${WORKING_DIR}/zeppelin" # Force release version - mvn versions:set -DnewVersion="${RELEASE_VERSION}" + ./mvnw versions:set -DnewVersion="${RELEASE_VERSION}" # Using Nexus API documented here: # https://support.sonatype.com/hc/en-us/articles/213465868-Uploading-to-a-Staging-Repository-via-REST-API @@ -128,9 +124,9 @@ function publish_to_maven() { rm -rf $HOME/.m2/repository/org/apache/zeppelin # build with scala-2.11 - echo "mvn clean install -DskipTests \ + echo "./mvnw clean install -DskipTests \ ${PUBLISH_PROFILES} ${PROJECT_OPTIONS}" - mvn clean install -DskipTests \ + ./mvnw clean install -DskipTests \ ${PUBLISH_PROFILES} ${PROJECT_OPTIONS} if [[ $? -ne 0 ]]; then echo "Build failed." diff --git a/dev/test_zeppelin_pr.py b/dev/test_zeppelin_pr.py deleted file mode 100755 index 22602d0dd6d..00000000000 --- a/dev/test_zeppelin_pr.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -# This utility creates a local branch from specified pullrequest, to help the test and review -# You'll need to run this utility from master branch with command -# -# dev/test_zeppelin_pr.py [#PR] -# -# then pr[#PR] branch will be created. -# - -from __future__ import print_function -import sys, os, subprocess, json, codecs - -if sys.version_info[0] == 2: - from urllib import urlopen -else: - from urllib.request import urlopen - -if len(sys.argv) == 1: - print("usage) " + sys.argv[0] + " [#PR]") - print(" eg) " + sys.argv[0] + " 122") - sys.exit(1) - - -pr=sys.argv[1] -githubApi="https://api.github.com/repos/apache/zeppelin" - -reader = codecs.getreader("utf-8") -prInfo = json.load(reader(urlopen(githubApi + "/pulls/" + pr))) -if "message" in prInfo and prInfo["message"] == "Not Found": - sys.stderr.write("PullRequest #" + pr + " not found\n") - sys.exit(1) - -prUser=prInfo['user']['login'] -prRepoUrl=prInfo['head']['repo']['clone_url'] -prBranch=prInfo['head']['label'].replace(":", "/") -print(prBranch) - -# create local branch -exitCode = os.system("git checkout -b pr" + pr) -if exitCode != 0: - sys.exit(1) - -# add remote repository and fetch -exitCode = os.system("git remote remove " + prUser) -exitCode = os.system("git remote add " + prUser + " " + prRepoUrl) -if exitCode != 0: - sys.stderr.write("Can not add remote repository.\n") - sys.exit(1) - -exitCode = os.system("git fetch " + prUser) -if exitCode != 0: - sys.stderr.write("Can't fetch remote repository.\n") - sys.exit(1) - - -currentBranch = subprocess.check_output("git rev-parse --abbrev-ref HEAD", shell=True).rstrip().decode("utf-8") - -print("Merge branch " + prBranch + " into " + currentBranch) - -rev = subprocess.check_output("git rev-parse " + prBranch, shell=True).rstrip().decode("utf-8") -prAuthor = subprocess.check_output("git --no-pager show -s --format=\"%an <%ae>\" " + rev, shell=True).rstrip().decode("utf-8") -prAuthorDate = subprocess.check_output("git --no-pager show -s --format=\"%ad\" " + rev, shell=True).rstrip().decode("utf-8") - -prTitle = prInfo['title'] -prBody = prInfo['body'] - -commitList = subprocess.check_output("git log --pretty=format:\"%h\" " + currentBranch + ".." + prBranch, shell=True).rstrip().decode("utf-8") -authorList = [] -for commitHash in commitList.split("\n"): - a = subprocess.check_output("git show -s --pretty=format:\"%an <%ae>\" "+commitHash, shell=True).rstrip().decode("utf-8") - if a not in authorList: - authorList.append(a) - -commitMsg = prTitle + "\n" -if prBody : - commitMsg += prBody + "\n\n" -for author in authorList: - commitMsg += "Author: " + author +"\n" -commitMsg += "\n" -commitMsg += "Closes #" + pr + " from " + prBranch + " and squashes the following commits:\n\n" -commitMsg += subprocess.check_output("git log --pretty=format:\"%h [%an] %s\" " + currentBranch + ".." + prBranch, shell=True).rstrip().decode("utf-8") - -exitCode = os.system("git merge --no-commit --squash " + prBranch) -if exitCode != 0: - sys.stderr.write("Can not merge\n") - sys.exit(1) - -exitCode = os.system('git commit -a --author "' + prAuthor + '" --date "' + prAuthorDate + '" -m"' + commitMsg + '"') -if exitCode != 0: - sys.stderr.write("Commit failed\n") - sys.exit(1) - -os.system("git remote remove " + prUser) -print("Branch " + prBranch + " is merged into " + currentBranch) diff --git a/docs/README.md b/docs/README.md index 7ca822edbdc..c9646e12957 100644 --- a/docs/README.md +++ b/docs/README.md @@ -42,7 +42,7 @@ bundle exec jekyll serve --watch **Run locally using docker** ``` -docker run --rm -it \ +docker run --rm -it \ -v $PWD:/docs \ -w /docs \ -p '4000:4000' \ diff --git a/docs/_config.yml b/docs/_config.yml index f1f3bb0f545..af2eaf85b84 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -21,7 +21,7 @@ author : twitter : ASF feedburner : feedname -ZEPPELIN_VERSION : 0.10.0-SNAPSHOT +ZEPPELIN_VERSION : 0.12.0-SNAPSHOT # The production_url is only used when full-domain names are needed # such as sitemap.txt @@ -59,7 +59,7 @@ JB : # - Only the following values are falsy: ["", null, false] # - When setting BASE_PATH it must be a valid url. # This means always setting the protocol (http|https) or prefixing with "/" - BASE_PATH : /docs/0.10.0-SNAPSHOT + BASE_PATH : /docs/0.12.0-SNAPSHOT # By default, the asset_path is automatically defined relative to BASE_PATH plus the enabled theme. # ex: [BASE_PATH]/assets/themes/[THEME-NAME] diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index 8bbf6b0034a..9305359fdfb 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -34,8 +34,10 @@
  • Yarn
  • Spark with Zeppelin
  • +
  • Flink with Zeppelin
  • SQL with Zeppelin
  • Python with Zeppelin
  • +
  • R with Zeppelin
  • @@ -112,12 +114,13 @@
  • Git Storage
  • S3 Storage
  • Azure Storage
  • +
  • Google Cloud Storage
  • OSS Storage
  • -
  • ZeppelinHub Storage
  • MongoDB Storage
  • Operation
  • Configuration
  • +
  • Monitoring
  • Proxy Setting
  • Upgrading
  • Trouble Shooting
  • @@ -131,43 +134,30 @@
  • Overview
  • Spark
  • +
  • Flink
  • JDBC
  • Python
  • R
  • Alluxio
  • -
  • Beam
  • BigQuery
  • Cassandra
  • Elasticsearch
  • -
  • Flink
  • -
  • Geode
  • Groovy
  • -
  • Hazelcast Jet
  • HBase
  • HDFS
  • Hive
  • -
  • Ignite
  • influxDB
  • Java
  • Jupyter
  • -
  • Kotlin
  • -
  • KSQL
  • -
  • Kylin
  • -
  • Lens
  • Livy
  • Mahout
  • Markdown
  • MongoDB
  • Neo4j
  • -
  • Pig
  • Postgresql, HAWQ
  • -
  • SAP
  • -
  • Scalding
  • -
  • Scio
  • Shell
  • Sparql
  • -
  • Submarine
  • @@ -190,9 +180,9 @@
  • How to Contribute (website)
  • External Resources
  • -
  • Mailing List
  • -
  • Apache Zeppelin Wiki
  • -
  • Stackoverflow Questions about Zeppelin
  • +
  • Mailing List
  • +
  • Apache Zeppelin Wiki
  • +
  • Stackoverflow Questions about Zeppelin
  • diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif b/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif index 3c827f4b6ec..dd4d1daf9be 100644 Binary files a/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif and b/docs/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_architecture.png b/docs/assets/themes/zeppelin/img/docs-img/flink_architecture.png new file mode 100644 index 00000000000..6a2a6e9332e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_architecture.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_docker_tutorial.gif b/docs/assets/themes/zeppelin/img/docs-img/flink_docker_tutorial.gif new file mode 100644 index 00000000000..aa53c5bab6f Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_docker_tutorial.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_scala_codecompletion.png b/docs/assets/themes/zeppelin/img/docs-img/flink_scala_codecompletion.png new file mode 100644 index 00000000000..6b6dcda493a Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_scala_codecompletion.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_comment.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_comment.png new file mode 100644 index 00000000000..6d866ac0d62 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_comment.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_jobname.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_jobname.png new file mode 100644 index 00000000000..9f8e2f44daf Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_jobname.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_multiple_insert.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_multiple_insert.png new file mode 100644 index 00000000000..5eaa4acf44e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_multiple_insert.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_sql_parallelism.png b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_parallelism.png new file mode 100644 index 00000000000..260686c8a0f Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_sql_parallelism.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_streaming_wordcount.png b/docs/assets/themes/zeppelin/img/docs-img/flink_streaming_wordcount.png new file mode 100644 index 00000000000..4b1168b88cb Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_streaming_wordcount.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_udf_jars.png b/docs/assets/themes/zeppelin/img/docs-img/flink_udf_jars.png new file mode 100644 index 00000000000..c5431b4d90e Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/flink_udf_jars.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif b/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif index fe7e2e92923..29e38200644 100644 Binary files a/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif and b/docs/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png deleted file mode 100644 index 9f6d5ab3fd0..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png deleted file mode 100644 index feec0ccf396..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-logo.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-logo.png deleted file mode 100644 index 97a63e80967..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-logo.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png deleted file mode 100644 index ffa1c260bf3..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png b/docs/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png deleted file mode 100644 index 9f43bd29285..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/jdbc_refresh.gif b/docs/assets/themes/zeppelin/img/docs-img/jdbc_refresh.gif new file mode 100644 index 00000000000..e891ceb5af1 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/jdbc_refresh.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/python_pandas_sql.png b/docs/assets/themes/zeppelin/img/docs-img/python_pandas_sql.png new file mode 100644 index 00000000000..960e6b0e48d Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/python_pandas_sql.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/python_zshow_df.png b/docs/assets/themes/zeppelin/img/docs-img/python_zshow_df.png new file mode 100644 index 00000000000..ce654f39b35 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/python_zshow_df.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/r_shiny_app.gif b/docs/assets/themes/zeppelin/img/docs-img/r_shiny_app.gif new file mode 100644 index 00000000000..21c243241e0 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/r_shiny_app.gif differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME16.png b/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME16.png deleted file mode 100644 index f925d47c17e..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME16.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME24.png b/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME24.png deleted file mode 100644 index 0eaa063d608..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/spark_SPARK_HOME24.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/spark_deprecate.png b/docs/assets/themes/zeppelin/img/docs-img/spark_deprecate.png deleted file mode 100644 index 8a867ccecb4..00000000000 Binary files a/docs/assets/themes/zeppelin/img/docs-img/spark_deprecate.png and /dev/null differ diff --git a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png b/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png deleted file mode 100644 index b90b982e1d8..00000000000 Binary files a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png and /dev/null differ diff --git a/docs/development/contribution/how_to_contribute_code.md b/docs/development/contribution/how_to_contribute_code.md index 3683428f985..32543c0797a 100644 --- a/docs/development/contribution/how_to_contribute_code.md +++ b/docs/development/contribution/how_to_contribute_code.md @@ -70,19 +70,19 @@ Before making a pull request, please take a look [Contribution Guidelines](http: ### Build ```bash -mvn install +./mvnw install ``` To skip test ```bash -mvn install -DskipTests +./mvnw install -DskipTests ``` To build with specific spark / hadoop version ```bash -mvn install -Dspark.version=x.x.x -Dhadoop.version=x.x.x +./mvnw install -Dspark.version=x.x.x -Dhadoop.version=x.x.x ``` For the further @@ -97,7 +97,7 @@ For the further ```bash cd zeppelin-server HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME \ -mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args="" +./mvnw exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args="" ``` #### Option 2 - Daemon Script @@ -105,7 +105,7 @@ mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexe > **Note:** Make sure you first run ```bash -mvn clean install -DskipTests +./mvnw clean install -DskipTests ``` in your zeppelin root directory, otherwise your server build will fail to find the required dependencies in the local repro. @@ -137,17 +137,17 @@ cd /zeppelin-interpreter/src/main/thrift ### Run Selenium test -Zeppelin has [set of integration tests](https://github.com/apache/zeppelin/tree/master/zeppelin-server/src/test/java/org/apache/zeppelin/integration) using Selenium. To run these test, first build and run Zeppelin and make sure Zeppelin is running on port 8080. Then you can run test using following command +Zeppelin has [set of integration tests](https://github.com/apache/zeppelin/tree/master/zeppelin-integration/src/test/java/org/apache/zeppelin/integration) using Selenium. To run these test, first build and run Zeppelin and make sure Zeppelin is running on port 8080. Then you can run test using following command ```bash -TEST_SELENIUM=true mvn test -Dtest=[TEST_NAME] -DfailIfNoTests=false \ +TEST_SELENIUM=true ./mvnw test -Dtest=[TEST_NAME] -DfailIfNoTests=false \ -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' ``` -For example, to run [ParagraphActionIT](https://github.com/apache/zeppelin/blob/master/zeppelin-server/src/test/java/org/apache/zeppelin/integration/ParagraphActionsIT.java), +For example, to run [ParagraphActionIT](https://github.com/apache/zeppelin/blob/master/zeppelin-integration/src/test/java/org/apache/zeppelin/integration/ParagraphActionsIT.java), ```bash -TEST_SELENIUM=true mvn test -Dtest=ParagraphActionsIT -DfailIfNoTests=false \ +TEST_SELENIUM=true ./mvnw test -Dtest=ParagraphActionsIT -DfailIfNoTests=false \ -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' ``` diff --git a/docs/development/contribution/useful_developer_tools.md b/docs/development/contribution/useful_developer_tools.md index 17ca40307f5..47f3a84dc61 100644 --- a/docs/development/contribution/useful_developer_tools.md +++ b/docs/development/contribution/useful_developer_tools.md @@ -61,28 +61,27 @@ you can use this function like `setjdk 1.8` / `setjdk 1.7` ```bash # build `zeppelin-web` only -mvn clean -pl 'zeppelin-web' package -DskipTests; +./mvnw clean -pl 'zeppelin-web' package -DskipTests; # build `zeppelin-server` and its dependencies only -mvn clean package -pl 'spark,spark-dependencies,python,markdown,zeppelin-server' --am -DskipTests +./mvnw clean package -pl 'spark,spark-dependencies,python,markdown,zeppelin-server' --am -DskipTests -# build spark related modules with default profiles: scala 2.10 -mvn clean package -pl 'spark,spark-dependencies,zeppelin-server' --am -DskipTests +# build spark related modules with default profiles +./mvnw clean package -pl 'spark,spark-dependencies,zeppelin-server' --am -DskipTests -# build spark related modules with profiles: scala 2.11, spark 2.1 hadoop 2.7 -./dev/change_scala_version.sh 2.11 -mvn clean package -Pspark-2.1 -Phadoop-2.7 -Pscala-2.11 \ +# build spark related modules with profiles: scala 2.13, spark 3.5 hadoop 3.3 +./mvnw clean package -Pspark-scala-2.13 -Pspark-3.5 -Phadoop-3.3 \ -pl 'spark,spark-dependencies,zeppelin-server' --am -DskipTests # build `zeppelin-server` and `markdown` with dependencies -mvn clean package -pl 'markdown,zeppelin-server' --am -DskipTests +./mvnw clean package -pl 'markdown,zeppelin-server' --am -DskipTests ``` ### Running Individual Tests ```bash # run the `HeliumBundleFactoryTest` test class -mvn test -pl 'zeppelin-server' --am -DfailIfNoTests=false -Dtest=HeliumBundleFactoryTest +./mvnw test -pl 'zeppelin-server' --am -DfailIfNoTests=false -Dtest=HeliumBundleFactoryTest ``` ### Running Selenium Tests @@ -91,12 +90,12 @@ Make sure that Zeppelin instance is started to execute integration tests (= sele ```bash # run the `SparkParagraphIT` test class -TEST_SELENIUM="true" mvn test -pl 'zeppelin-server' --am \ +TEST_SELENIUM="true" ./mvnw test -pl 'zeppelin-server' --am \ -DfailIfNoTests=false -Dtest=SparkParagraphIT # run the `testSqlSpark` test function only in the `SparkParagraphIT` class # but note that, some test might be dependent on the previous tests -TEST_SELENIUM="true" mvn test -pl 'zeppelin-server' --am \ +TEST_SELENIUM="true" ./mvnw test -pl 'zeppelin-server' --am \ -DfailIfNoTests=false -Dtest=SparkParagraphIT#testSqlSpark ``` diff --git a/docs/development/helium/overview.md b/docs/development/helium/overview.md index 08a401af462..92da7e8a932 100644 --- a/docs/development/helium/overview.md +++ b/docs/development/helium/overview.md @@ -40,4 +40,4 @@ Currently, Helium supports 4 types of package. ## Configuration Zeppelin ships with several builtin helium plugins which is located in $ZEPPELIN_HOME/heliums. If you want to try more types of heliums plugins, -you can configure `zeppelin.helium.registry` to be `helium,https://s3.amazonaws.com/helium-package/helium.json` in zeppelin-site.xml. `https://s3.amazonaws.com/helium-package/helium.json` will be updated regularly. +you can configure `zeppelin.helium.registry` to be `helium,https://zeppelin.apache.org/helium.json` in zeppelin-site.xml. `https://zeppelin.apache.org/helium.json` will be updated regularly. diff --git a/docs/development/writing_zeppelin_interpreter.md b/docs/development/writing_zeppelin_interpreter.md index 33ecee1631e..fa4970a293c 100644 --- a/docs/development/writing_zeppelin_interpreter.md +++ b/docs/development/writing_zeppelin_interpreter.md @@ -236,7 +236,7 @@ To configure your interpreter you need to follow these steps: 2. In the interpreter page, click the `+Create` button and configure your interpreter properties. Now you are done and ready to use your interpreter. -> **Note :** Interpreters released with zeppelin have a [default configuration](https://github.com/apache/zeppelin/blob/master/zeppelin-zengine/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java#L397) which is used when there is no `conf/zeppelin-site.xml`. +> **Note :** Interpreters released with zeppelin have a [default configuration](https://github.com/apache/zeppelin/blob/master/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java#L928) which is used when there is no `conf/zeppelin-site.xml`. ## Use your interpreter diff --git a/docs/index.md b/docs/index.md index f1cf29e5138..75d362ca868 100644 --- a/docs/index.md +++ b/docs/index.md @@ -111,7 +111,6 @@ limitations under the License. * [S3 Storage](./setup/storage/storage.html#notebook-storage-in-s3) * [Azure Storage](./setup/storage/storage.html#notebook-storage-in-azure) * [Google Cloud Storage](./setup/storage/storage.html#notebook-storage-in-gcs) - * [ZeppelinHub Storage](./setup/storage/storage.html#notebook-storage-in-zeppelinhub) * [MongoDB Storage](./setup/storage/storage.html#notebook-storage-in-mongodb) * Operation * [Configuration](./setup/operation/configuration.html): lists for Apache Zeppelin @@ -136,42 +135,29 @@ limitations under the License. #### Available Interpreters * [Alluxio](./interpreter/alluxio.html) - * [Beam](./interpreter/beam.html) * [BigQuery](./interpreter/bigquery.html) * [Cassandra](./interpreter/cassandra.html) * [Elasticsearch](./interpreter/elasticsearch.html) * [Flink](./interpreter/flink.html) - * [Geode](./interpreter/geode.html) * [Groovy](./interpreter/groovy.html) - * [Hazelcast Jet](./interpreter/hazelcastjet.html) * [HBase](./interpreter/hbase.html) * [HDFS](./interpreter/hdfs.html) * [Hive](./interpreter/hive.html) - * [Ignite](./interpreter/ignite.html) * [influxDB](./interpreter/influxdb.html) * [Java](./interpreter/java.html) * [JDBC](./interpreter/jdbc.html) * [Jupyter](./interpreter/jupyter.html) - * [Kotlin](./interpreter/kotlin.html) - * [KSQL](./interpreter/ksql.html) - * [Kylin](./interpreter/kylin.html) - * [Lens](./interpreter/lens.html) * [Livy](./interpreter/livy.html) * [Mahout](./interpreter/mahout.html) * [Markdown](./interpreter/markdown.html) * [MongoDB](./interpreter/mongodb.html) * [Neo4j](./interpreter/neo4j.html) - * [Pig](./interpreter/pig.html) * [Postgresql, HAWQ](./interpreter/postgresql.html) * [Python](./interpreter/python.html) * [R](./interpreter/r.html) - * [SAP](./interpreter/sap.html) - * [Scalding](./interpreter/scalding.html) - * [Scio](./interpreter/scio.html) * [Shell](./interpreter/shell.html) * [Spark](./interpreter/spark.html) * [Sparql](./interpreter/sparql.html) - * [Submarine](./interpreter/submarine.html) #### External Resources * [Mailing List](https://zeppelin.apache.org/community.html) diff --git a/docs/interpreter/beam.md b/docs/interpreter/beam.md deleted file mode 100644 index d992b8ee5b5..00000000000 --- a/docs/interpreter/beam.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -layout: page -title: Beam interpreter in Apache Zeppelin -description: Apache Beam is an open source, unified programming model that you can use to create a data processing pipeline. -group: interpreter ---- - - -{% include JB/setup %} - -# Beam interpreter for Apache Zeppelin - -
    - -## Overview -[Apache Beam](http://beam.incubator.apache.org) is an open source unified platform for data processing pipelines. A pipeline can be build using one of the Beam SDKs. -The execution of the pipeline is done by different Runners. Currently, Beam supports Apache Flink Runner, Apache Spark Runner, and Google Dataflow Runner. - -## How to use -Basically, you can write normal Beam java code where you can determine the Runner. You should write the main method inside a class becuase the interpreter invoke this main to execute the pipeline. Unlike Zeppelin normal pattern, each paragraph is considered as a separate job, there isn't any relation to any other paragraph. - -The following is a demonstration of a word count example with data represented in array of strings -But it can read data from files by replacing `Create.of(SENTENCES).withCoder(StringUtf8Coder.of())` with `TextIO.Read.from("path/to/filename.txt")` - -```java -%beam - -// most used imports -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.transforms.Create; -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import java.util.ArrayList; -import org.apache.beam.runners.direct.*; -import org.apache.beam.sdk.runners.*; -import org.apache.beam.sdk.options.*; -import org.apache.beam.runners.flink.*; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.options.PipelineOptions; - -public class MinimalWordCount { - static List s = new ArrayList<>(); - - static final String[] SENTENCES_ARRAY = new String[] { - "Hadoop is the Elephant King!", - "A yellow and elegant thing.", - "He never forgets", - "Useful data, or lets", - "An extraneous element cling!", - "A wonderful king is Hadoop.", - "The elephant plays well with Sqoop.", - "But what helps him to thrive", - "Are Impala, and Hive,", - "And HDFS in the group.", - "Hadoop is an elegant fellow.", - "An elephant gentle and mellow.", - "He never gets mad,", - "Or does anything bad,", - "Because, at his core, he is yellow", - }; - static final List SENTENCES = Arrays.asList(SENTENCES_ARRAY); - public static void main(String[] args) { - PipelineOptions options = PipelineOptionsFactory.create().as(PipelineOptions.class); - options.setRunner(FlinkRunner.class); - Pipeline p = Pipeline.create(options); - p.apply(Create.of(SENTENCES).withCoder(StringUtf8Coder.of())) - .apply("ExtractWords", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - for (String word : c.element().split("[^a-zA-Z']+")) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - })) - .apply(Count. perElement()) - .apply("FormatResults", ParDo.of(new DoFn, String>() { - @ProcessElement - public void processElement(DoFn, String>.ProcessContext arg0) - throws Exception { - s.add("\n" + arg0.element().getKey() + "\t" + arg0.element().getValue()); - } - })); - p.run(); - System.out.println("%table word\tcount"); - for (int i = 0; i < s.size(); i++) { - System.out.print(s.get(i)); - } - - } -} - -``` - diff --git a/docs/interpreter/bigquery.md b/docs/interpreter/bigquery.md index cdac762f6db..da696a74f2e 100644 --- a/docs/interpreter/bigquery.md +++ b/docs/interpreter/bigquery.md @@ -53,6 +53,11 @@ limitations under the License. BigQuery SQL dialect (standardSQL or legacySQL). If empty, [query prefix](https://cloud.google.com/bigquery/docs/reference/standard-sql/enabling-standard-sql#sql-prefix) like '#standardSQL' can be used. + + zeppelin.bigquery.region + + BigQuery dataset region (Needed for single region dataset) + @@ -68,7 +73,7 @@ In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon a Within Google Cloud Platform (e.g. Google App Engine, Google Compute Engine), built-in credentials are used by default. -Outside of GCP, follow the Google API authentication instructions for [Zeppelin Google Cloud Storage](https://zeppelin.apache.org/docs/latest/storage/storage.html#notebook-storage-in-gcs) +Outside of GCP, follow the Google API authentication instructions for [Zeppelin Google Cloud Storage](https://zeppelin.apache.org/docs/latest/setup/storage/storage.html#notebook-storage-in-google-cloud-storage) ## Using the BigQuery Interpreter diff --git a/docs/interpreter/cassandra.md b/docs/interpreter/cassandra.md index 0de7b51b1f8..a49ae7e2421 100644 --- a/docs/interpreter/cassandra.md +++ b/docs/interpreter/cassandra.md @@ -163,7 +163,7 @@ The complete list of all CQL statements and versions can be found below: 3.x - https://docs.datastax.com/en/archived/cql/3.3/cql/cqlIntro.html @@ -172,7 +172,7 @@ The complete list of all CQL statements and versions can be found below: 2.2 - https://docs.datastax.com/en/archived/cql/3.3/cql/cqlIntro.html @@ -181,7 +181,7 @@ The complete list of all CQL statements and versions can be found below: 2.1 - http://docs.datastax.com/en/cql/3.1/cql/cql_intro_c.html diff --git a/docs/interpreter/flink.md b/docs/interpreter/flink.md index 01ea99e5af7..df272cbdb9b 100644 --- a/docs/interpreter/flink.md +++ b/docs/interpreter/flink.md @@ -24,9 +24,10 @@ limitations under the License.
    ## Overview -[Apache Flink](https://flink.apache.org) is an open source platform for distributed stream and batch data processing. Flink’s core is a streaming dataflow engine that provides data distribution, communication, and fault tolerance for distributed computations over data streams. Flink also builds batch processing on top of the streaming engine, overlaying native iteration support, managed memory, and program optimization. +[Apache Flink](https://flink.apache.org) is a framework and distributed processing engine for stateful computations over unbounded and bounded data streams. +Flink has been designed to run in all common cluster environments, perform computations at in-memory speed and at any scale. -In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to support the latest version of Flink. **Only Flink 1.10+ is supported, old versions of flink won't work.** +In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to support the latest version of Flink. **Currently, only Flink 1.15+ is supported, old versions of flink won't work.** Apache Flink is supported in Zeppelin with the Flink interpreter group which consists of the five interpreters listed below. @@ -62,13 +63,111 @@ Apache Flink is supported in Zeppelin with the Flink interpreter group which con
    +## Main Features + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FeatureDescription
    Support multiple versions of FlinkYou can run different versions of Flink in one Zeppelin instance
    Support multiple languagesScala, Python, SQL are supported, besides that you can also collaborate across languages, e.g. you can write Scala UDF and use it in PyFlink
    Support multiple execution modesLocal | Remote | Yarn | Yarn Application
    Support HiveHive catalog is supported
    Interactive developmentInteractive development user experience increase your productivity
    Enhancement on Flink SQL* Support both streaming sql and batch sql in one notebook
    +* Support sql comment (single line comment/multiple line comment)
    +* Support advanced configuration (jobName, parallelism)
    +* Support multiple insert statements +
    Multi-tenancyMultiple user can work in one Zeppelin instance without affecting each other.
    Rest API SupportYou can not only submit Flink job via Zeppelin notebook UI, but also can do that via its rest api (You can use Zeppelin as Flink job server).
    + +## Play Flink in Zeppelin docker + +For beginner, we would suggest you to play Flink in Zeppelin docker. +First you need to download Flink, because there's no Flink binary distribution shipped with Zeppelin. +e.g. Here we download Flink 1.12.2 to`/mnt/disk1/flink-1.12.2`, +and we mount it to Zeppelin docker container and run the following command to start Zeppelin docker. + +```bash +docker run -u $(id -u) -p 8080:8080 -p 8081:8081 --rm -v /mnt/disk1/flink-1.12.2:/opt/flink -e FLINK_HOME=/opt/flink --name zeppelin apache/zeppelin:0.10.0 +``` + +After running the above command, you can open `http://localhost:8080` to play Flink in Zeppelin. We only verify the flink local mode in Zeppelin docker, other modes may not due to network issues. +`-p 8081:8081` is to expose Flink web ui, so that you can access Flink web ui via `http://localhost:8081`. + +Here's screenshot of running note `Flink Tutorial/5. Streaming Data Analytics` + + + + +You can also mount notebook folder to replace the built-in zeppelin tutorial notebook. +e.g. Here's a repo of Flink sql cookbook on Zeppelin: [https://github.com/zjffdu/flink-sql-cookbook-on-zeppelin/](https://github.com/zjffdu/flink-sql-cookbook-on-zeppelin/) + +You can clone this repo and mount it to docker, + +``` +docker run -u $(id -u) -p 8080:8080 --rm -v /mnt/disk1/flink-sql-cookbook-on-zeppelin:/notebook -v /mnt/disk1/flink-1.12.2:/opt/flink -e FLINK_HOME=/opt/flink -e ZEPPELIN_NOTEBOOK_DIR='/notebook' --name zeppelin apache/zeppelin:0.10.0 +``` + ## Prerequisites -* Download Flink 1.10 for scala 2.11 (Only scala-2.11 is supported, scala-2.12 is not supported yet in Zeppelin) +Download Flink 1.15 or afterwards (Only Scala 2.12 is supported) + +### Version-specific notes for Flink + +Flink 1.15 is scala free and has changed its binary distribution, the following extra steps is required. +* Move FLINK_HOME/opt/flink-table-planner_2.12-1.15.0.jar to FLINK_HOME/lib +* Move FLINK_HOME/lib/flink-table-planner-loader-1.15.0.jar to FLINK_HOME/opt +* Download flink-table-api-scala-bridge_2.12-1.15.0.jar and flink-table-api-scala_2.12-1.15.0.jar to FLINK_HOME/lib + +Flink 1.16 introduces new `ClientResourceManager` for sql client, you need to move `FLINK_HOME/opt/flink-sql-client-1.16.0.jar` to `FLINK_HOME/lib` + +## Flink on Zeppelin Architecture + + + +The above diagram is the architecture of Flink on Zeppelin. Flink interpreter on the left side is actually a Flink client +which is responsible for compiling and managing Flink job lifecycle, such as submit, cancel job, +monitoring job progress and so on. The Flink cluster on the right side is the place where executing Flink job. +It could be a MiniCluster (local mode), Standalone cluster (remote mode), +Yarn session cluster (yarn mode) or Yarn application session cluster (yarn-application mode) + +There are 2 important components in Flink interpreter: Scala shell & Python shell + +* Scala shell is the entry point of Flink interpreter, it would create all the entry points of Flink program, such as ExecutionEnvironment,StreamExecutionEnvironment and TableEnvironment. Scala shell is responsible for compiling and running Scala code and sql. +* Python shell is the entry point of PyFlink, it is responsible for compiling and running Python code. ## Configuration + The Flink interpreter can be configured with properties provided by Zeppelin (as following table). -You can also add and set other flink properties which are not listed in the table. For a list of additional properties, refer to [Flink Available Properties](https://ci.apache.org/projects/flink/flink-docs-master/ops/config.html). +You can also add and set other Flink properties which are not listed in the table. For a list of additional properties, refer to [Flink Available Properties](https://ci.apache.org/projects/flink/flink-docs-master/ops/config.html). @@ -78,7 +177,7 @@ You can also add and set other flink properties which are not listed in the tabl - + @@ -93,7 +192,7 @@ You can also add and set other flink properties which are not listed in the tabl - + @@ -108,12 +207,12 @@ You can also add and set other flink properties which are not listed in the tabl - + - + @@ -138,32 +237,32 @@ You can also add and set other flink properties which are not listed in the tabl - + - + - + - + - + - + @@ -183,14 +282,18 @@ You can also add and set other flink properties which are not listed in the tabl - + - + + + + + + - @@ -198,171 +301,333 @@ You can also add and set other flink properties which are not listed in the tabl - + - + + + + + + - + - - - - - - +
    Property
    `FLINK_HOME` Location of flink installation. It is must be specified, otherwise you can not use flink in ZeppelinLocation of Flink installation. It is must be specified, otherwise you can not use Flink in Zeppelin
    `HADOOP_CONF_DIR`
    flink.execution.mode localExecution mode of flink, e.g. local | yarn | remoteExecution mode of Flink, e.g. local | remote | yarn | yarn-application
    flink.execution.remote.host
    jobmanager.memory.process.size 1024mTotal number of memory of JobManager, e.g. 1024m. It is official [flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)Total memory size of JobManager, e.g. 1024m. It is official [Flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)
    taskmanager.memory.process.size 1024mTotal number of memory of TaskManager, e.g. 1024m. It is official [flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)Total memory size of TaskManager, e.g. 1024m. It is official [Flink property](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/config/)
    taskmanager.numberOfTaskSlots
    zeppelin.flink.uiWebUrl User specified Flink JobManager url, it could be used in remote mode where Flink cluster is already started, or could be used as url template, e.g. https://knox-server:8443/gateway/cluster-topo/yarn/proxy/{{applicationId}}/ where {{applicationId}} would be replaced with yarn app idUser specified Flink JobManager url, it could be used in remote mode where Flink cluster is already started, or could be used as url template, e.g. https://knox-server:8443/gateway/cluster-topo/yarn/proxy/{% raw %}{{applicationId}}{% endraw %}/ where {% raw %}{{applicationId}}{% endraw %} is placeholder of yarn app id
    zeppelin.flink.run.asLoginUser trueWhether run flink job as the zeppelin login user, it is only applied when running flink job in hadoop yarn cluster and shiro is enabledWhether run Flink job as the Zeppelin login user, it is only applied when running Flink job in hadoop yarn cluster and shiro is enabled
    flink.udf.jars Flink udf jars (comma separated), zeppelin will register udf in this jar automatically for user. These udf jars could be either local files or hdfs files if you have hadoop installed. The udf name is the class name.Flink udf jars (comma separated), Zeppelin will register udf in these jars automatically for user. These udf jars could be either local files or hdfs files if you have hadoop installed. The udf name is the class name.
    flink.udf.jars.packages Packages (comma separated) that would be searched for the udf defined in `flink.udf.jars`.Packages (comma separated) that would be searched for the udf defined in `flink.udf.jars`. Specifying this can reduce the number of classes to scan, otherwise all the classes in udf jar will be scanned.
    flink.execution.jars Additional user jars (comma separated), these jars could be either local files or hdfs files if you have hadoop installed.Additional user jars (comma separated), these jars could be either local files or hdfs files if you have hadoop installed. It can be used to specify Flink connector jars or udf jars (no udf class auto-registration like `flink.udf.jars`)
    flink.execution.packages Additional user packages (comma separated), e.g. org.apache.flink:flink-connector-kafka_2.11:1.10,org.apache.flink:flink-connector-kafka-base_2.11:1.10.0,org.apache.flink:flink-json:1.10.0Additional user packages (comma separated), e.g. `org.apache.flink:flink-json:1.10.0`
    zeppelin.flink.concurrentBatchSql.max
    table.exec.resource.default-parallelism 1Default parallelism for flink sql jobDefault parallelism for Flink sql job
    zeppelin.flink.scala.color trueWhether display scala shell output in colorful formatWhether display Scala shell output in colorful format
    zeppelin.flink.scala.shell.tmp_dirTemp folder for storing scala shell compiled jar
    zeppelin.flink.enableHive false
    zeppelin.flink.hive.version2.3.42.3.7 Hive version that you would like to connect
    zeppelin.flink.module.enableHive falseWhether enable hive module, hive udf take precedence over flink udf if hive module is enabled.Whether enable hive module, hive udf take precedence over Flink udf if hive module is enabled.
    zeppelin.flink.maxResult 1000 max number of row returned by sql interpreter
    `zeppelin.flink.job.check_interval`1000Check interval (in milliseconds) to check Flink job progress
    `flink.interpreter.close.shutdown_cluster` trueWhether shutdown application when closing interpreterWhether shutdown Flink cluster when closing interpreter
    `zeppelin.interpreter.close.cancel_job` trueWhether cancel flink job when closing interpreter
    `zeppelin.flink.job.check_interval`1000Check interval (in milliseconds) to check flink job progressWhether cancel Flink job when closing interpreter
    -## StreamExecutionEnvironment, ExecutionEnvironment, StreamTableEnvironment, BatchTableEnvironment +## Interpreter Binding Mode -Zeppelin will create 6 variables as flink scala (`%flink`) entry point: +The default [interpreter binding mode](../usage/interpreter/interpreter_binding_mode.html) is `globally shared`. That means all notes share the same Flink interpreter which means they share the same Flink cluster. +In practice, we would recommend you to use `isolated per note` which means each note has own Flink interpreter without affecting each other (Each one has his own Flink cluster). -* `senv` (StreamExecutionEnvironment), -* `benv` (ExecutionEnvironment) -* `stenv` (StreamTableEnvironment for blink planner) -* `btenv` (BatchTableEnvironment for blink planner) -* `stenv_2` (StreamTableEnvironment for flink planner) -* `btenv_2` (BatchTableEnvironment for flink planner) -And will create 6 variables as pyflink (`%flink.pyflink` or `%flink.ipyflink`) entry point: +## Execution Mode -* `s_env` (StreamExecutionEnvironment), -* `b_env` (ExecutionEnvironment) -* `st_env` (StreamTableEnvironment for blink planner) -* `bt_env` (BatchTableEnvironment for blink planner) -* `st_env_2` (StreamTableEnvironment for flink planner) -* `bt_env_2` (BatchTableEnvironment for flink planner) +Flink in Zeppelin supports 4 execution modes (`flink.execution.mode`): -## Blink/Flink Planner +* Local +* Remote +* Yarn +* Yarn Application -There are 2 planners supported by Flink's table api: `flink` & `blink`. +### Local Mode -* If you want to use DataSet api, and convert it to flink table then please use flink planner (`btenv_2` and `stenv_2`). -* In other cases, we would always recommend you to use `blink` planner. This is also what flink batch/streaming sql interpreter use (`%flink.bsql` & `%flink.ssql`) +Running Flink in local mode will start a MiniCluster in local JVM. By default, the local MiniCluster use port 8081, so make sure this port is available in your machine, +otherwise you can configure `rest.port` to specify another port. You can also specify `local.number-taskmanager` and `flink.tm.slot` to customize the number of TM and number of slots per TM. +Because by default it is only 4 TM with 1 slot in this MiniCluster which may not be enough for some cases. -Check this [page](https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/common.html#main-differences-between-the-two-planners) for the difference between flink planner and blink planner. +### Remote Mode +Running Flink in remote mode will connect to an existing Flink cluster which could be standalone cluster or yarn session cluster. Besides specifying `flink.execution.mode` to be `remote`, you also need to specify +`flink.execution.remote.host` and `flink.execution.remote.port` to point to Flink job manager's rest api address. -## Execution mode (Local/Remote/Yarn/Yarn Application) +### Yarn Mode -Flink in Zeppelin supports 4 execution modes (`flink.execution.mode`): +In order to run Flink in Yarn mode, you need to make the following settings: -* Local -* Remote -* Yarn -* Yarn Application +* Set `flink.execution.mode` to be `yarn` +* Set `HADOOP_CONF_DIR` in Flink's interpreter setting or `zeppelin-env.sh`. +* Make sure `hadoop` command is on your `PATH`. Because internally Flink will call command `hadoop classpath` and load all the hadoop related jars in the Flink interpreter process -### Run Flink in Local Mode +In this mode, Zeppelin would launch a Flink yarn session cluster for you and destroy it when you shutdown your Flink interpreter. -Running Flink in Local mode will start a MiniCluster in local JVM. By default, the local MiniCluster will use port 8081, so make sure this port is available in your machine, -otherwise you can configure `rest.port` to specify another port. You can also specify `local.number-taskmanager` and `flink.tm.slot` to customize the number of TM and number of slots per TM, -because by default it is only 4 TM with 1 Slots which may not be enough for some cases. +### Yarn Application Mode -### Run Flink in Remote Mode +In the above yarn mode, there will be a separated Flink interpreter process on the Zeppelin server host. However, this may run out of resources when there are too many interpreter processes. +So in practise, we would recommend you to use yarn application mode if you are using Flink 1.11 or afterwards (yarn application mode is only supported after Flink 1.11). +In this mode Flink interpreter runs in the JobManager which is in yarn container. +In order to run Flink in yarn application mode, you need to make the following settings: -Running Flink in remote mode will connect to an existing flink cluster which could be standalone cluster or yarn session cluster. Besides specifying `flink.execution.mode` to be `remote`. You also need to specify -`flink.execution.remote.host` and `flink.execution.remote.port` to point to flink job manager. +* Set `flink.execution.mode` to be `yarn-application` +* Set `HADOOP_CONF_DIR` in Flink's interpreter setting or `zeppelin-env.sh`. +* Make sure `hadoop` command is on your `PATH`. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in Flink interpreter process -### Run Flink in Yarn Mode -In order to run flink in Yarn mode, you need to make the following settings: +## Flink Scala -* Set `flink.execution.mode` to `yarn` -* Set `HADOOP_CONF_DIR` in flink's interpreter setting or `zeppelin-env.sh`. -* Make sure `hadoop` command is on your PATH. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in the flink interpreter process +Scala is the default language of Flink on Zeppelin(`%flink`), and it is also the entry point of Flink interpreter. Underneath Flink interpreter will create Scala shell +which would create several built-in variables, including ExecutionEnvironment,StreamExecutionEnvironment and so on. +So don't create these Flink environment variables again, otherwise you might hit weird issues. The Scala code you write in Zeppelin will be submitted to this Scala shell. +Here are the builtin variables created in Flink Scala shell. -### Run Flink in Yarn Application Mode +* senv (StreamExecutionEnvironment), +* benv (ExecutionEnvironment) +* stenv (StreamTableEnvironment for blink planner (aka. new planner)) +* btenv (BatchTableEnvironment for blink planner (aka. new planner)) +* z (ZeppelinContext) -In the above yarn mode, there will be a separated flink interpreter process. This may run out of resources when there're many interpreter processes. -So it is recommended to use yarn application mode if you are using flink 1.11 or afterwards (yarn application mode is only supported after flink 1.11). In this mode flink interpreter runs in the JobManager which is in yarn container. -In order to run flink in yarn application mode, you need to make the following settings: +### Blink/Flink Planner -* Set `flink.execution.mode` to `yarn-application` -* Set `HADOOP_CONF_DIR` in flink's interpreter setting or `zeppelin-env.sh`. -* Make sure `hadoop` command is on your PATH. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in the flink interpreter process +After Zeppelin 0.11, we remove the support of flink planner (aka. old planner) which is also removed after Flink 1.14. +### Stream WordCount Example -## How to use Hive +You can write whatever Scala code in Zeppelin. -In order to use Hive in Flink, you have to make the following setting. +e.g. in the following example, we write a classical streaming wordcount example. -* Set `zeppelin.flink.enableHive` to be true -* Set `zeppelin.flink.hive.version` to be the hive version you are using. -* Set `HIVE_CONF_DIR` to be the location where `hive-site.xml` is located. Make sure hive metastore is started and you have configured `hive.metastore.uris` in `hive-site.xml` -* Copy the following dependencies to the lib folder of flink installation.  - * flink-connector-hive_2.11–1.10.0.jar - * flink-hadoop-compatibility_2.11–1.10.0.jar - * hive-exec-2.x.jar (for hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303–0.9.2.jar and libthrift-0.9.2.jar) + + + +### Code Completion + +You can type tab for code completion. + + + +### ZeppelinContext + +`ZeppelinContext` provides some additional functions and utilities. +See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. +For Flink interpreter, you can use `z` to display Flink `Dataset/Table`. + +e.g. you can use `z.show` to display DataSet, Batch Table, Stream Table. + +* z.show(DataSet) + + + + +* z.show(Batch Table) + + + + +* z.show(Stream Table) + + + + +## Flink SQL + +In Zeppelin, there are 2 kinds of Flink sql interpreter you can use + +* `%flink.ssql` +Streaming Sql interpreter which launch Flink streaming job via `StreamTableEnvironment` +* `%flink.bsql` +Batch Sql interpreter which launch Flink batch job via `BatchTableEnvironment` + +Flink Sql interpreter in Zeppelin is equal to Flink Sql-client + many other enhancement features. + +### Enhancement SQL Features + +#### Support batch SQL and streaming sql together. -## Flink Batch SQL +In Flink Sql-client, either you run streaming sql or run batch sql in one session. You can not run them together. +But in Zeppelin, you can do that. `%flink.ssql` is used for running streaming sql, while `%flink.bsql` is used for running batch sql. +Batch/Streaming Flink jobs run in the same Flink session cluster. -`%flink.bsql` is used for flink's batch sql. You can type `help` to get all the available commands. -It supports all the flink sql, including DML/DDL/DQL. +#### Support multiple statements -* Use `insert into` statement for batch ETL -* Use `select` statement for batch data analytics +You can write multiple sql statements in one paragraph, each sql statement is separated by semicolon. -## Flink Streaming SQL +#### Comment support -`%flink.ssql` is used for flink's streaming sql. You just type `help` to get all the available commands. -It supports all the flink sql, including DML/DDL/DQL. +2 kinds of sql comments are supported in Zeppelin: -* Use `insert into` statement for streaming ETL -* Use `select` statement for streaming data analytics +* Single line comment start with `--` +* Multiple line comment around with `/* */` -## Streaming Data Visualization + + + +#### Job parallelism setting + +You can set the sql parallelism via paragraph local property: `parallelism` + + + +#### Support multiple insert + +Sometimes you have multiple insert statements which read the same source, +but write to different sinks. By default, each insert statement would launch a separated Flink job, +but you can set paragraph local property: `runAsOne` to be `true` to run them in one single Flink job. + + + +#### Set job name + +You can set Flink job name for insert statement via setting paragraph local property: `jobName`. To be noticed, +you can only set job name for insert statement. Select statement is not supported yet. +And this kind of setting only works for single insert statement. It doesn't work for multiple insert we mentioned above. + + + +### Streaming Data Visualization + +Zeppelin can visualize the select sql result of Flink streaming job. Overall it supports 3 modes: -Zeppelin supports 3 types of streaming data analytics: * Single * Update * Append -### type=single -Single mode is for the case when the result of sql statement is always one row, such as the following example. The output format is HTML, +#### Single Mode + +Single mode is for the case when the result of sql statement is always one row, +such as the following example. The output format is HTML, and you can specify paragraph local property `template` for the final output content template. -And you can use `{i}` as placeholder for the ith column of result. +You can use `{i}` as placeholder for the `ith` column of result. + + + -
    - ![Interactive Help]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/flink_single_mode.gif) -
    +#### Update Mode -### type=update -Update mode is suitable for the case when the output is more than one rows, and always will be updated continuously. +Update mode is suitable for the case when the output is more than one rows, and will always be updated continuously. Here’s one example where we use group by. -
    - ![Interactive Help]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/flink_update_mode.gif) -
    + + +#### Append Mode + +Append mode is suitable for the scenario where output data is always appended. +E.g. the following example which use tumble window. + + + +## PyFlink + +PyFlink is Python entry point of Flink on Zeppelin, internally Flink interpreter will create Python shell which +would create Flink's environment variables (including ExecutionEnvironment, StreamExecutionEnvironment and so on). +To be noticed, the java environment behind Pyflink is created in Scala shell. +That means underneath Scala shell and Python shell share the same environment. +These are variables created in Python shell. + +* `s_env` (StreamExecutionEnvironment), +* `b_env` (ExecutionEnvironment) +* `st_env` (StreamTableEnvironment for blink planner (aka. new planner)) +* `bt_env` (BatchTableEnvironment for blink planner (aka. new planner)) + + +### Configure PyFlink + +There are 3 things you need to configure to make Pyflink work in Zeppelin. + +* Install pyflink + e.g. ( pip install apache-flink==1.11.1 ). + If you need to use Pyflink udf, then you to install pyflink on all the task manager nodes. That means if you are using yarn, then all the yarn nodes need to install pyflink. +* Copy `python` folder under `${FLINK_HOME}/opt` to `${FLINK_HOME/lib`. +* Set `zeppelin.pyflink.python` as the python executable path. By default, it is the python in `PATH`. In case you have multiple versions of python installed, you need to configure `zeppelin.pyflink.python` as the python version you want to use. + +### How to use PyFlink + +There are 2 ways to use PyFlink in Zeppelin + +* `%flink.pyflink` +* `%flink.ipyflink` + +`%flink.pyflink` is much simple and easy, you don't need to do anything except the above setting, +but its function is also limited. We suggest you to use `%flink.ipyflink` which provides almost the same user experience like jupyter. + +### Configure IPyFlink + +If you don't have anaconda installed, then you need to install the following 3 libraries. + +``` +pip install jupyter +pip install grpcio +pip install protobuf +``` + +If you have anaconda installed, then you only need to install following 2 libraries. + +``` +pip install grpcio +pip install protobuf +``` + +`ZeppelinContext` is also available in PyFlink, you can use it almost the same as in Flink Scala. + +Check the [Python doc](python.html) for more features of IPython. + + +## Third party dependencies + +It is very common to have third party dependencies when you write Flink job in whatever languages (Scala, Python, Sql). +It is very easy to add dependencies in IDE (e.g. add dependency in pom.xml), +but how can you do that in Zeppelin ? Mainly there are 2 settings you can use to add third party dependencies + +* flink.execution.packages +* flink.execution.jars + +### flink.execution.packages + +This is the recommended way of adding dependencies. Its implementation is the same as adding +dependencies in `pom.xml`. Underneath it would download all the packages and its transitive dependencies +from maven repository, then put them on the classpath. Here's one example of how to add kafka connector of Flink 1.10 via [inline configuration](../usage/interpreter/overview.html#inline-generic-configuration). + +``` +%flink.conf + +flink.execution.packages org.apache.flink:flink-connector-kafka_2.11:1.10.0,org.apache.flink:flink-connector-kafka-base_2.11:1.10.0,org.apache.flink:flink-json:1.10.0 +``` + +The format is `artifactGroup:artifactId:version`, if you have multiple packages, +then separate them with comma. `flink.execution.packages` requires internet accessible. +So if you can not access internet, you need to use `flink.execution.jars` instead. + +### flink.execution.jars + +If your Zeppelin machine can not access internet or your dependencies are not deployed to maven repository, +then you can use `flink.execution.jars` to specify the jar files you depend on (each jar file is separated with comma) + +Here's one example of how to add kafka dependencies(including kafka connector and its transitive dependencies) via `flink.execution.jars` + +``` +%flink.conf + +flink.execution.jars /usr/lib/flink-kafka/target/flink-kafka-1.0-SNAPSHOT.jar +``` -### type=append -Append mode is suitable for the scenario where output data is always appended. E.g. the following example which use tumble window. -
    - ![Interactive Help]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/flink_append_mode.gif) -
    - ## Flink UDF -You can use Flink scala UDF or Python UDF in sql. UDF for batch and streaming sql is the same. Here're 2 examples. +There are 4 ways you can define UDF in Zeppelin. + +* Write Scala UDF +* Write PyFlink UDF +* Create UDF via SQL +* Configure udf jar via flink.udf.jars -* Scala UDF +### Scala UDF ```scala %flink @@ -370,11 +635,16 @@ You can use Flink scala UDF or Python UDF in sql. UDF for batch and streaming sq class ScalaUpper extends ScalarFunction { def eval(str: String) = str.toUpperCase } -btenv.registerFunction("scala_upper", new ScalaUpper()) +btenv.registerFunction("scala_upper", new ScalaUpper()) ``` -* Python UDF +It is very straightforward to define scala udf almost the same as what you do in IDE. +After creating udf class, you need to register it via `btenv`. +You can also register it via `stenv` which share the same Catalog with `btenv`. + + +### Python UDF ```python @@ -387,54 +657,78 @@ class PythonUpper(ScalarFunction): bt_env.register_function("python_upper", udf(PythonUpper(), DataTypes.STRING(), DataTypes.STRING())) ``` +It is also very straightforward to define Python udf almost the same as what you do in IDE. +After creating udf class, you need to register it via `bt_env`. +You can also register it via `st_env` which share the same Catalog with `bt_env`. -Zeppelin only supports scala and python for flink interpreter, if you want to write a java udf or the udf is pretty complicated which make it not suitable to write in Zeppelin, -then you can write the udf in IDE and build an udf jar. -In Zeppelin you just need to specify `flink.udf.jars` to this jar, and flink -interpreter will detect all the udfs in this jar and register all the udfs to TableEnvironment, the udf name is the class name. +### UDF via SQL -## PyFlink(%flink.pyflink) -In order to use PyFlink in Zeppelin, you just need to do the following configuration. -* Install apache-flink (e.g. pip install apache-flink) -* Set `zeppelin.pyflink.python` to the python executable where apache-flink is installed in case you have multiple python installed. -* Copy flink-python_2.11–1.10.0.jar from flink opt folder to flink lib folder +Some simple udf can be written in Zeppelin. But if the udf logic is very complicated, +then it is better to write it in IDE, then register it in Zeppelin as following -And PyFlink will create 6 variables for you: +```sql +%flink.ssql + +CREATE FUNCTION myupper AS 'org.apache.zeppelin.flink.udf.JavaUpper'; +``` + +But this kind of approach requires the udf jar must be on `CLASSPATH`, +so you need to configure `flink.execution.jars` to include this udf jar on `CLASSPATH`, such as following: + +``` +%flink.conf + +flink.execution.jars /usr/lib/flink-udf-1.0-SNAPSHOT.jar +``` + +### flink.udf.jars + +The above 3 approaches all have some limitations: + +* It is suitable to write simple Scala udf or Python udf in Zeppelin, but not suitable to write very complicated udf in Zeppelin. Because notebook doesn't provide advanced features compared to IDE, such as package management, code navigation and etc. +* It is not easy to share the udf between notes or users, you have to run the paragraph of defining udf in each flink interpreter. + +So when you have many udfs or udf logic is very complicated and you don't want to register them by yourself every time, then you can use `flink.udf.jars` + +* Step 1. Create a udf project in your IDE, write your udf there. +* Step 2. Set `flink.udf.jars` to point to the udf jar you build from your udf project + +For example, + +``` +%flink.conf + +flink.execution.jars /usr/lib/flink-udf-1.0-SNAPSHOT.jar +``` + +Zeppelin would scan this jar, find out all the udf classes and then register them automatically for you. +The udf name is the class name. For example, here's the output of show functions after specifing the above udf jars in `flink.udf.jars` + + + +By default, Zeppelin would scan all the classes in this jar, +so it would be pretty slow if your jar is very big specially when your udf jar has other dependencies. +So in this case we would recommend you to specify `flink.udf.jars.packages` to specify the package to scan, +this can reduce the number of classes to scan and make the udf detection much faster. + + +## How to use Hive + +In order to use Hive in Flink, you have to make the following settings. + +* Set `zeppelin.flink.enableHive` to be true +* Set `zeppelin.flink.hive.version` to be the hive version you are using. +* Set `HIVE_CONF_DIR` to be the location where `hive-site.xml` is located. Make sure hive metastore is started and you have configured `hive.metastore.uris` in `hive-site.xml` +* Copy the following dependencies to the lib folder of flink installation. + * flink-connector-hive_2.11–*.jar + * flink-hadoop-compatibility_2.11–*.jar + * hive-exec-2.x.jar (for hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303–0.9.2.jar and libthrift-0.9.2.jar) -* `s_env` (StreamExecutionEnvironment), -* `b_env` (ExecutionEnvironment) -* `st_env` (StreamTableEnvironment for blink planner) -* `bt_env` (BatchTableEnvironment for blink planner) -* `st_env_2` (StreamTableEnvironment for flink planner) -* `bt_env_2` (BatchTableEnvironment for flink planner) - -### IPython Support(%flink.ipyflink) - -By default, zeppelin would use IPython in `%flink.pyflink` when IPython is available, Otherwise it would fall back to the original python implementation. -For the IPython features, you can refer doc[Python Interpreter](python.html) - -## ZeppelinContext -Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities. -See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. You can use `z` to display both flink DataSet and batch/stream table. - -* Display DataSet -
    - ![Interactive Help]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/flink_z_dataset.png) -
    - -* Display Batch Table -
    - ![Interactive Help]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/flink_z_batch_table.png) -
    -* Display Stream Table -
    - ![Interactive Help]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/flink_z_stream_table.gif) -
    ## Paragraph local properties In the section of `Streaming Data Visualization`, we demonstrate the different visualization type via paragraph local properties: `type`. -In this section, we will list and explain all the supported local properties in flink interpreter. +In this section, we will list and explain all the supported local properties in Flink interpreter. @@ -498,5 +792,8 @@ In this section, we will list and explain all the supported local properties in Zeppelin is shipped with several Flink tutorial notes which may be helpful for you. You can check for more features in the tutorial notes. +## Community + +[Join our community](http://zeppelin.apache.org/community.html) to discuss with others. diff --git a/docs/interpreter/geode.md b/docs/interpreter/geode.md deleted file mode 100644 index 436c308c5c1..00000000000 --- a/docs/interpreter/geode.md +++ /dev/null @@ -1,218 +0,0 @@ ---- -layout: page -title: "Geode/Gemfire OQL Interpreter for Apache Zeppelin" -description: "Apache Geode (incubating) provides a database-like consistency model, reliable transaction processing and a shared-nothing architecture to maintain very low latency performance with high concurrency processing." -group: interpreter ---- - -{% include JB/setup %} - -# Geode/Gemfire OQL Interpreter for Apache Zeppelin - -
    - -## Overview -
    - - - - - - - - - - -
    NameClassDescription
    %geode.oqlGeodeOqlInterpreterProvides OQL environment for Apache Geode
    - -This interpreter supports the [Geode](http://geode.incubator.apache.org/) [Object Query Language (OQL)](http://geode-docs.cfapps.io/docs/developing/querying_basics/oql_compared_to_sql.html). -With the OQL-based querying language: - -[zeppelin-view](https://www.youtube.com/watch?v=zvzzA9GXu3Q) - -* You can query on any arbitrary object -* You can navigate object collections -* You can invoke methods and access the behavior of objects -* Data mapping is supported -* You are not required to declare types. Since you do not need type definitions, you can work across multiple languages -* You are not constrained by a schema - -This [Video Tutorial](https://www.youtube.com/watch?v=zvzzA9GXu3Q) illustrates some of the features provided by the `Geode Interpreter`. - -## Create Interpreter -By default Zeppelin creates one `Geode/OQL` instance. You can remove it or create more instances. - -Multiple Geode instances can be created, each configured to the same or different backend Geode cluster. -But over time a `Notebook` can have only one Geode interpreter instance `bound`. -That means you _cannot_ connect to different Geode clusters in the same `Notebook`. -This is a known Zeppelin limitation. - -To create new Geode instance open the `Interpreter` section and click the `+Create` button. -Pick a `Name` of your choice and from the `Interpreter` drop-down select `geode`. -Then follow the configuration instructions and `Save` the new instance. - -> Note: The `Name` of the instance is used only to distinguish the instances while binding them to the `Notebook`. The `Name` is irrelevant inside the `Notebook`. In the `Notebook` you must use `%geode.oql` tag. - -## Bind to Notebook -In the `Notebook` click on the `settings` icon in the top right corner. -The select/deselect the interpreters to be bound with the `Notebook`. - -## Configuration -You can modify the configuration of the Geode from the `Interpreter` section. -The Geode interpreter expresses the following properties: - - - - - - - - - - - - - - - - - - - - - - -
    Property NameDescriptionDefault Value
    geode.locator.hostThe Geode Locator Hostlocalhost
    geode.locator.portThe Geode Locator Port10334
    geode.max.resultMax number of OQL result to display to prevent the browser overload1000
    - -## How to use -> *Tip 1: Use (CTRL + .) for OQL auto-completion.* - -> *Tip 2: Always start the paragraphs with the full `%geode.oql` prefix tag! The short notation: `%geode` would still be able run the OQL queries but the syntax highlighting and the auto-completions will be disabled.* - -### Create / Destroy Regions - -The OQL specification does not support [Geode Regions](https://cwiki.apache.org/confluence/display/GEODE/Index#Index-MainConceptsandComponents) mutation operations. -To `create`/`destroy` regions one should use the [GFSH](http://geode-docs.cfapps.io/docs/tools_modules/gfsh/chapter_overview.html) shell tool instead. -In the following it is assumed that the GFSH is colocated with Zeppelin server. - -```bash -%sh -source /etc/geode/conf/geode-env.sh -gfsh << EOF - - connect --locator=ambari.localdomain[10334] - - destroy region --name=/regionEmployee - destroy region --name=/regionCompany - create region --name=regionEmployee --type=REPLICATE - create region --name=regionCompany --type=REPLICATE - - exit; -EOF -``` - -Above snippet re-creates two regions: `regionEmployee` and `regionCompany`. -Note that you have to explicitly specify the locator host and port. -The values should match those you have used in the Geode Interpreter configuration. -Comprehensive list of [GFSH Commands by Functional Area](http://geode-docs.cfapps.io/docs/tools_modules/gfsh/gfsh_quick_reference.html). - -### Basic OQL -```sql -%geode.oql -SELECT count(*) FROM /regionEmployee -``` - -OQL `IN` and `SET` filters: - -```sql -%geode.oql -SELECT * FROM /regionEmployee -WHERE companyId IN SET(2) OR lastName IN SET('Tzolov13', 'Tzolov73') -``` - -OQL `JOIN` operations - -```sql -%geode.oql -SELECT e.employeeId, e.firstName, e.lastName, c.id as companyId, c.companyName, c.address -FROM /regionEmployee e, /regionCompany c -WHERE e.companyId = c.id -``` - -By default the QOL responses contain only the region entry values. To access the keys, query the `EntrySet` instead: - -```sql -%geode.oql -SELECT e.key, e.value.companyId, e.value.email -FROM /regionEmployee.entrySet e -``` -Following query will return the EntrySet value as a Blob: - -```sql -%geode.oql -SELECT e.key, e.value FROM /regionEmployee.entrySet e -``` - -> Note: You can have multiple queries in the same paragraph but only the result from the first is displayed. [[1](https://issues.apache.org/jira/browse/ZEPPELIN-178)], [[2](https://issues.apache.org/jira/browse/ZEPPELIN-212)]. - -### GFSH Commands From The Shell -Use the Shell Interpreter (`%sh`) to run OQL commands form the command line: - -```bash -%sh -source /etc/geode/conf/geode-env.sh -gfsh -e "connect" -e "list members" -``` - -### Apply Zeppelin Dynamic Forms -You can leverage [Zeppelin Dynamic Form](../usage/dynamic_form/intro.html) inside your OQL queries. You can use both the `text input` and `select form` parameterization features - -```sql -%geode.oql -SELECT * FROM /regionEmployee e WHERE e.employeeId > ${Id} -``` - -### Auto-completion -The Geode Interpreter provides a basic auto-completion functionality. On `(Ctrl+.)` it list the most relevant suggestions in a pop-up window. - -## Geode REST API -To list the defined regions you can use the [Geode REST API](http://geode-docs.cfapps.io/docs/geode_rest/chapter_overview.html): - -``` -http://phd1.localdomain:8484/gemfire-api/v1/ -``` - -```json -{ - "regions" : [{ - "name" : "regionEmployee", - "type" : "REPLICATE", - "key-constraint" : null, - "value-constraint" : null - }, { - "name" : "regionCompany", - "type" : "REPLICATE", - "key-constraint" : null, - "value-constraint" : null - }] -} -``` - -> To enable Geode REST API with JSON support add the following properties to geode.server.properties.file and restart: - -``` -http-service-port=8484 -start-dev-rest-api=true -``` diff --git a/docs/interpreter/hazelcastjet.md b/docs/interpreter/hazelcastjet.md deleted file mode 100644 index 06ebc888ac9..00000000000 --- a/docs/interpreter/hazelcastjet.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -layout: page -title: Hazelcast Jet interpreter in Apache Zeppelin -description: Build and execture Hazelcast Jet computation jobs. -group: interpreter ---- - - -{% include JB/setup %} - -# Hazelcast Jet interpreter for Apache Zeppelin - -
    - -## Overview -[Hazelcast Jet](https://jet.hazelcast.org) is an open source application embeddable, distributed computing engine for In-Memory Streaming and Fast Batch Processing built on top of Hazelcast In-Memory Data Grid (IMDG). -With Hazelcast IMDG providing storage functionality, Hazelcast Jet performs parallel execution to enable data-intensive applications to operate in near real-time. - -## Why Hazelcast Jet? -There are plenty of solutions which can solve some of these issues, so why choose Hazelcast Jet? -When speed and simplicity is important. - -Hazelcast Jet gives you all the infrastructure you need to build a distributed data processing pipeline within one 10Mb Java JAR: processing, storage and clustering. - -As it is built on top of Hazelcast IMDG, Hazelcast Jet comes with in-memory operational storage that’s available out-of-the box. This storage is partitioned, distributed and replicated across the Hazelcast Jet cluster for capacity and resiliency. It can be used as an input data buffer, to publish the results of a Hazelcast Jet computation, to connect multiple Hazelcast Jet jobs or as a lookup cache for data enrichment. - -## How to use the Hazelcast Jet interpreter -Basically, you can write normal java code. You should write the main method inside a class because the interpreter invoke this main to execute the code. Unlike Zeppelin normal pattern, each paragraph is considered as a separate job, there isn't any relation to any other paragraph. For example, a variable defined in one paragraph cannot be used in another one as each paragraph is a self contained java main class that is executed and the output returned to Zeppelin. - -The following is a demonstration of a word count example with the result represented as an Hazelcast IMDG IMap sink and displayed leveraging Zeppelin's built in visualization using the utility method `JavaInterpreterUtils.displayTableFromSimpleMap`. - -```java -%hazelcastjet - -import com.hazelcast.jet.Jet; -import com.hazelcast.jet.JetInstance; -import com.hazelcast.jet.core.DAG; -import com.hazelcast.jet.pipeline.Pipeline; -import com.hazelcast.jet.pipeline.Sinks; -import com.hazelcast.jet.pipeline.Sources; - -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import org.apache.zeppelin.java.JavaInterpreterUtils; - -import static com.hazelcast.jet.Traversers.traverseArray; -import static com.hazelcast.jet.aggregate.AggregateOperations.counting; -import static com.hazelcast.jet.function.DistributedFunctions.wholeItem; - -public class DisplayTableFromSimpleMapExample { - - public static void main(String[] args) { - - // Create the specification of the computation pipeline. Note - // it's a pure POJO: no instance of Jet needed to create it. - Pipeline p = Pipeline.create(); - p.drawFrom(Sources.list("text")) - .flatMap(word -> - traverseArray(word.toLowerCase().split("\\W+"))) - .filter(word -> !word.isEmpty()) - .groupingKey(wholeItem()) - .aggregate(counting()) - .drainTo(Sinks.map("counts")); - - // Start Jet, populate the input list - JetInstance jet = Jet.newJetInstance(); - try { - List text = jet.getList("text"); - text.add("hello world hello hello world"); - text.add("world world hello world"); - - // Perform the computation - jet.newJob(p).join(); - - // Diplay the results with Zeppelin %table - Map counts = jet.getMap("counts"); - System.out.println(JavaInterpreterUtils.displayTableFromSimpleMap("Word","Count", counts)); - - } finally { - Jet.shutdownAll(); - } - - } - -} -``` - -The following is a demonstration where the Hazelcast DAG (directed acyclic graph) is displayed as a graph leveraging Zeppelin's built in visualization using the utility method `HazelcastJetInterpreterUtils.displayNetworkFromDAG`. -This is particularly useful to understand how the high level Pipeline is then converted to the Jet’s low-level Core API. - -```java -%hazelcastjet - -import com.hazelcast.jet.pipeline.Pipeline; -import com.hazelcast.jet.pipeline.Sinks; -import com.hazelcast.jet.pipeline.Sources; - -import org.apache.zeppelin.hazelcastjet.HazelcastJetInterpreterUtils; - -import static com.hazelcast.jet.Traversers.traverseArray; -import static com.hazelcast.jet.aggregate.AggregateOperations.counting; -import static com.hazelcast.jet.function.DistributedFunctions.wholeItem; - -public class DisplayNetworkFromDAGExample { - - public static void main(String[] args) { - - // Create the specification of the computation pipeline. Note - // it's a pure POJO: no instance of Jet needed to create it. - Pipeline p = Pipeline.create(); - p.drawFrom(Sources.list("text")) - .flatMap(word -> - traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing") - .filter(word -> !word.isEmpty()) - .groupingKey(wholeItem()) - .aggregate(counting()) - .drainTo(Sinks.map("counts")); - - // Diplay the results with Zeppelin %network - System.out.println(HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag())); - - } - -} -``` - -Note -- By clicking on a node of the graph, the node type is displayed (either Source, Sink or Transform). This is also visually represented with colors (Sources and Sinks are blue, Transforms are orange). -- By clicking on an edge of the graph, the following details are shown: routing (UNICAST, PARTITIONED, ISOLATED, BROADCAST), distributed (true or false), priority (int). diff --git a/docs/interpreter/hbase.md b/docs/interpreter/hbase.md index fd6334acebc..50228407c92 100644 --- a/docs/interpreter/hbase.md +++ b/docs/interpreter/hbase.md @@ -32,14 +32,14 @@ By default, Zeppelin is built against HBase 1.0.x releases. To work with HBase 1 ```bash # HBase 1.1.4 -mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.1.4 -Dhbase.hadoop.version=2.6.0 +./mvnw clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.1.4 -Dhbase.hadoop.version=2.6.0 ``` To work with HBase 1.2.0+, use the following build command: ```bash # HBase 1.2.0 -mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.2.0 -Dhbase.hadoop.version=2.6.0 +./mvnw clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.2.0 -Dhbase.hadoop.version=2.6.0 ``` ## Configuration diff --git a/docs/interpreter/hive.md b/docs/interpreter/hive.md index 86602fcc27f..94e49ce6a4d 100644 --- a/docs/interpreter/hive.md +++ b/docs/interpreter/hive.md @@ -25,7 +25,7 @@ limitations under the License. ## Important Notice -Hive Interpreter will be deprecated and merged into JDBC Interpreter. +Hive Interpreter has been deprecated and merged into JDBC Interpreter. You can use Hive Interpreter by using JDBC Interpreter with same functionality. See the example below of settings and dependencies. @@ -36,19 +36,19 @@ See the example below of settings and dependencies. Value - hive.driver + default.driver org.apache.hive.jdbc.HiveDriver - hive.url + default.url jdbc:hive2://localhost:10000 - hive.user + default.user hiveUser - hive.password + default.password hivePassword @@ -103,34 +103,22 @@ See the example below of settings and dependencies. ( Optional ) Other properties used by the driver - ${prefix}.driver - - Driver class path of %hive(${prefix}) - - - ${prefix}.url - - Url of %hive(${prefix}) + zeppelin.jdbc.hive.timeout.threshold + 60000 + Timeout for hive job timeout - ${prefix}.user - - ( Optional ) Username of the connection of %hive(${prefix}) + zeppelin.jdbc.hive.monitor.query_interval + 1000 + Query interval for hive statement - ${prefix}.password - - ( Optional ) Password of the connection of %hive(${prefix}) - - - ${prefix}.xxx - - ( Optional ) Other properties used by the driver of %hive(${prefix}) + zeppelin.jdbc.hive.engines.tag.enable + true + Set application tag for applications started by hive engines -This interpreter provides multiple configuration with `${prefix}`. User can set a multiple connection properties by this prefix. It can be used like `%hive(${prefix})`. - ## Overview The [Apache Hive](https://hive.apache.org/) ™ data warehouse software facilitates querying and managing large datasets @@ -147,14 +135,6 @@ Basically, you can use select * from my_table; ``` -or - -```sql -%hive(etl) --- 'etl' is a ${prefix} -select * from my_table; -``` - You can also run multiple queries up to 10 by default. Changing these settings is not implemented yet. ### Apply Zeppelin Dynamic Forms diff --git a/docs/interpreter/ignite.md b/docs/interpreter/ignite.md deleted file mode 100644 index 40c56b89f46..00000000000 --- a/docs/interpreter/ignite.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -layout: page -title: "Ignite Interpreter for Apache Zeppelin" -description: "Apache Ignite in-memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies." -group: interpreter ---- - -{% include JB/setup %} - -# Ignite Interpreter for Apache Zeppelin - -
    - -## Overview -[Apache Ignite](https://ignite.apache.org/) In-Memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies. - -![Apache Ignite]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-logo.png) - -You can use Zeppelin to retrieve distributed data from cache using Ignite SQL interpreter. Moreover, Ignite interpreter allows you to execute any Scala code in cases when SQL doesn't fit to your requirements. For example, you can populate data into your caches or execute distributed computations. - -## Installing and Running Ignite example -In order to use Ignite interpreters, you may install Apache Ignite in some simple steps: - -1. Ignite provides examples only with source or binary release. Download Ignite [source release](https://ignite.apache.org/download.html#sources) or [binary release](https://ignite.apache.org/download.html#binaries) whatever you want. But you must download Ignite as the same version of Zeppelin's. If it is not, you can't use scala code on Zeppelin. The supported Ignite version is specified in [Supported Interpreter table](https://zeppelin.apache.org/supported_interpreters.html#ignite) for each Zeppelin release. If you're using Zeppelin master branch, please see `ignite.version` in `path/to/your-Zeppelin/ignite/pom.xml`. -2. Examples are shipped as a separate Maven project, so to start running you simply need to import provided `/apache-ignite-fabric-{version}-bin/examples/pom.xml` file into your favourite IDE, such as Eclipse. - -* In case of Eclipse, Eclipse -> File -> Import -> Existing Maven Projects -* Set examples directory path to Eclipse and select the pom.xml. -* Then start `org.apache.ignite.examples.ExampleNodeStartup` (or whatever you want) to run at least one or more ignite node. When you run example code, you may notice that the number of node is increase one by one. - -> **Tip. If you want to run Ignite examples on the cli not IDE, you can export executable Jar file from IDE. Then run it by using below command.** - -```bash -nohup java -jar -``` - -## Configuring Ignite Interpreter -At the "Interpreters" menu, you may edit Ignite interpreter or create new one. Zeppelin provides these properties for Ignite. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property NamevalueDescription
    ignite.addresses127.0.0.1:47500..47509Coma separated list of Ignite cluster hosts. See Ignite Cluster Configuration section for more details.
    ignite.clientModetrueYou can connect to the Ignite cluster as client or server node. See Ignite Clients vs. Servers section for details. Use true or false values in order to connect in client or server mode respectively.
    ignite.config.urlConfiguration URL. Overrides all other settings.
    ignite.jdbc.urljdbc:ignite:cfg://default-ignite-jdbc.xmlIgnite JDBC connection URL.
    ignite.peerClassLoadingEnabledtrueEnables peer-class-loading. See Zero Deployment section for details. Use true or false values in order to enable or disable P2P class loading respectively.
    - -![Configuration of Ignite Interpreter]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-interpreter-setting.png) - -## How to use -After configuring Ignite interpreter, create your own notebook. Then you can bind interpreters like below image. - -![Binding Interpreters]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-interpreter-binding.png) - -For more interpreter binding information see [here](../usage/interpreter/overview.html#what-is-interpreter-setting). - -### Ignite SQL interpreter -In order to execute SQL query, use ` %ignite.ignitesql ` prefix.
    -Supposing you are running `org.apache.ignite.examples.streaming.wordcount.StreamWords`, then you can use "words" cache( Of course you have to specify this cache name to the Ignite interpreter setting section `ignite.jdbc.url` of Zeppelin ). -For example, you can select top 10 words in the words cache using the following query - -```sql -%ignite.ignitesql -select _val, count(_val) as cnt from String group by _val order by cnt desc limit 10 -``` - -![IgniteSql on Zeppelin]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-sql-example.png) - -As long as your Ignite version and Zeppelin Ignite version is same, you can also use scala code. Please check the Zeppelin Ignite version before you download your own Ignite. - -```scala -%ignite -import org.apache.ignite._ -import org.apache.ignite.cache.affinity._ -import org.apache.ignite.cache.query._ -import org.apache.ignite.configuration._ - -import scala.collection.JavaConversions._ - -val cache: IgniteCache[AffinityUuid, String] = ignite.cache("words") - -val qry = new SqlFieldsQuery("select avg(cnt), min(cnt), max(cnt) from (select count(_val) as cnt from String group by _val)", true) - -val res = cache.query(qry).getAll() - -collectionAsScalaIterable(res).foreach(println _) -``` - -![Using Scala Code]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ignite-scala-example.png) - -Apache Ignite also provides a guide docs for Zeppelin ["Ignite with Apache Zeppelin"](https://apacheignite-sql.readme.io/docs/apache-zeppelin) diff --git a/docs/interpreter/jdbc.md b/docs/interpreter/jdbc.md index d556160a0da..152117fe75e 100644 --- a/docs/interpreter/jdbc.md +++ b/docs/interpreter/jdbc.md @@ -1,7 +1,7 @@ --- layout: page title: "Generic JDBC Interpreter for Apache Zeppelin" -description: "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter." +description: "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Presto/Trino, Impala, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter." group: interpreter --- - -{% include JB/setup %} - -# Kotlin interpreter for Apache Zeppelin - -
    - -## Overview -Kotlin is a cross-platform, statically typed, general-purpose programming language with type inference. -It is designed to interoperate fully with Java, and the JVM version of its standard library depends on the Java Class Library, but type inference allows its syntax to be more concise. - -## Configuration - - - - - - - - - - - - - - - - - - - - -
    NameDefaultDescription
    zeppelin.kotlin.maxResult1000Max n
    zeppelin.kotlin.shortenTypestrueDisplay shortened types instead of full, e.g. Int vs kotlin.Int
    - -## Example - -```kotlin -%kotlin - -fun square(n: Int): Int = n * n -``` - -## Kotlin Context -Kotlin context is accessible via `kc` object bound to the interpreter. -It holds `vars` and `functions` fields that return all user-defined variables and functions present in the interpreter. -You can also print variables or functions by calling `kc.showVars()` or `kc.showFunctions()`. - -### Example - - -```kotlin -fun square(n: Int): Int = n * n - -val greeter = { s: String -> println("Hello $s!") } -val l = listOf("Drive", "to", "develop") - -kc.showVars() -kc.showFunctions() -``` -Output: -``` -l: List = [Drive, to, develop] -greeter: (String) -> Unit = (kotlin.String) -> kotlin.Unit -fun square(Int): Int -``` diff --git a/docs/interpreter/ksql.md b/docs/interpreter/ksql.md deleted file mode 100644 index bc91ade6418..00000000000 --- a/docs/interpreter/ksql.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -layout: page -title: "KSQL Interpreter for Apache Zeppelin" -description: "SQL is the streaming SQL engine for Apache Kafka and provides an easy-to-use yet powerful interactive SQL interface for stream processing on Kafka." -group: interpreter ---- - -{% include JB/setup %} - -# KSQL Interpreter for Apache Zeppelin - -
    - -## Overview -[KSQL](https://www.confluent.io/product/ksql/) is the streaming SQL engine for Apache Kafka®. It provides an easy-to-use yet powerful interactive SQL interface for stream processing on Kafka, - -## Configuration - - - - - - - - - - - - - - - -
    PropertyDefaultDescription
    ksql.urlhttp://localhost:8080The KSQL Endpoint base URL
    - -N.b. The interpreter supports all the KSQL properties, i.e. `ksql.streams.auto.offset.reset`. -The full list of KSQL parameters is [here](https://docs.confluent.io/current/ksql/docs/installation/server-config/config-reference.html). - -## Using the KSQL Interpreter -In a paragraph, use `%ksql` and start your SQL query in order to start to interact with KSQL. - -Following some examples: - -``` -%ksql -PRINT 'orders'; -``` - -![PRINT image]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ksql.1.gif) - -``` -%ksql -CREATE STREAM ORDERS WITH - (VALUE_FORMAT='AVRO', - KAFKA_TOPIC ='orders'); -``` - -![CREATE image]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ksql.1.gif) - -``` -%ksql -SELECT * -FROM ORDERS -LIMIT 10 -``` - -![LIMIT image]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/ksql.3.gif) \ No newline at end of file diff --git a/docs/interpreter/kylin.md b/docs/interpreter/kylin.md deleted file mode 100644 index 1f2b0f3ab44..00000000000 --- a/docs/interpreter/kylin.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -layout: page -title: "Apache Kylin Interpreter for Apache Zeppelin" -description: "Apache Kylin™ is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc. -." -group: interpreter ---- - -{% include JB/setup %} - -# Apache Kylin Interpreter for Apache Zeppelin - -
    - -## Overview -[Apache Kylin](https://kylin.apache.org/) is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc. The interpreter assumes that Apache Kylin has been installed and you can connect to Apache Kylin from the machine Apache Zeppelin is installed. -To get start with Apache Kylin, please see [Apache Kylin Quickstart](https://kylin.apache.org/docs15/index.html). - -## Configuration - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    NameDefaultDescription
    kylin.api.url http://localhost:7070/kylin/api/querykylin query POST API
    The format can be like http://<host>:<port>/kylin/api/query
    kylin.api.userADMINkylin user
    kylin.api.passwordKYLINkylin password
    kylin.query.projectlearn_kylinString, Project to perform query. Could update at notebook level
    kylin.query.ispartialtruetrue|false
    (@Deprecated since Apache Kylin V1.5)
    Whether accept a partial result or not, default be “false”. Set to “false” for production use.
    kylin.query.limit5000int, Query limit
    If limit is set in sql, perPage will be ignored.
    kylin.query.offset0int, Query offset
    If offset is set in sql, curIndex will be ignored.
    - -## Using the Apache Kylin Interpreter -In a paragraph, use `%kylin(project_name)` to select the **kylin** interpreter, **project name** and then input **sql**. If no project name defined, will use the default project name from the above configuration. - -```sql -%kylin(learn_project) -select count(*) from kylin_sales group by part_dt -``` - diff --git a/docs/interpreter/lens.md b/docs/interpreter/lens.md deleted file mode 100644 index cd00d1ca769..00000000000 --- a/docs/interpreter/lens.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -layout: page -title: "Lens Interpreter for Apache Zeppelin" -description: "Apache Lens provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with traditional data warehouses to appear like one." -group: interpreter ---- - -{% include JB/setup %} - -# Lens Interpreter for Apache Zeppelin - -
    - -## Overview -[Apache Lens](https://lens.apache.org/) provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with traditional data warehouses to appear like one. - -![Apache Lens]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-logo.png) - -## Installing and Running Lens -In order to use Lens interpreters, you may install Apache Lens in some simple steps: - -1. Download Lens for latest version from [the ASF](http://www.apache.org/dyn/closer.lua/lens/2.3-beta). Or the older release can be found [in the Archives](http://archive.apache.org/dist/lens/). -2. Before running Lens, you have to set HIVE_HOME and HADOOP_HOME. If you want to get more information about this, please refer to [here](http://lens.apache.org/lenshome/install-and-run.html#Installation). Lens also provides Pseudo Distributed mode. [Lens pseudo-distributed setup](http://lens.apache.org/lenshome/pseudo-distributed-setup.html) is done by using [docker](https://www.docker.com/). Hive server and hadoop daemons are run as separate processes in lens pseudo-distributed setup. -3. Now, you can start lens server (or stop). - -```bash -./bin/lens-ctl start # (or stop) -``` - -## Configuring Lens Interpreter -At the "Interpreters" menu, you can edit Lens interpreter or create new one. Zeppelin provides these properties for Lens. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property NamevalueDescription
    lens.client.dbnamedefaultThe database schema name
    lens.query.enable.persistent.resultsetfalseWhether to enable persistent resultset for queries. When enabled, server will fetch results from driver, custom format them if any and store in a configured location. The file name of query output is queryhandle-id, with configured extensions
    lens.server.base.urlhttp://hostname:port/lensapiThe base url for the lens server. you have to edit "hostname" and "port" that you may use(ex. http://0.0.0.0:9999/lensapi)
    lens.session.cluster.user defaultHadoop cluster username
    zeppelin.lens.maxResult1000Max number of rows to display
    zeppelin.lens.maxThreads10If concurrency is true then how many threads?
    zeppelin.lens.run.concurrenttrueRun concurrent Lens Sessions
    xxxyyyanything else from [Configuring lens server](https://lens.apache.org/admin/config-server.html)
    - -![Apache Lens Interpreter Setting]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-interpreter-setting.png) - -### Interpreter Binding for Zeppelin Notebook -After configuring Lens interpreter, create your own notebook, then you can bind interpreters like below image. - -![Zeppelin Notebook Interpreter Binding]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-interpreter-binding.png) - -For more interpreter binding information see [here](../usage/interpreter/overview.html#interpreter-binding-mode). - -### How to use -You can analyze your data by using [OLAP Cube](http://lens.apache.org/user/olap-cube.html) [QL](http://lens.apache.org/user/cli.html) which is a high level SQL like language to query and describe data sets organized in data cubes. -You may experience OLAP Cube like this [Video tutorial](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples). -As you can see in this video, they are using Lens Client Shell(`./bin/lens-cli.sh`). All of these functions also can be used on Zeppelin by using Lens interpreter. - -
  • Create and Use (Switch) Databases. - -```sql -create database newDb -``` - -``` -use newDb -``` - -
  • Create Storage. - -``` -create storage your/path/to/lens/client/examples/resources/db-storage.xml -``` - -
  • Create Dimensions, Show fields and join-chains of them. - -``` -create dimension your/path/to/lens/client/examples/resources/customer.xml -``` - -``` -dimension show fields customer -``` - -``` -dimension show joinchains customer -``` - -
  • Create Caches, Show fields and join-chains of them. - -``` -create cube your/path/to/lens/client/examples/resources/sales-cube.xml -``` - -``` -cube show fields sales -``` - -``` -cube show joinchains sales -``` - -
  • Create Dimtables and Fact. - -``` -create dimtable your/path/to/lens/client/examples/resources/customer_table.xml -``` - -``` -create fact your/path/to/lens/client/examples/resources/sales-raw-fact.xml -``` - -
  • Add partitions to Dimtable and Fact. - -``` -dimtable add single-partition --dimtable_name customer_table --storage_name local ---path your/path/to/lens/client/examples/resources/customer-local-part.xml -``` - -``` -fact add partitions --fact_name sales_raw_fact --storage_name local ---path your/path/to/lens/client/examples/resources/sales-raw-local-parts.xml -``` - -
  • Now, you can run queries on cubes. - -``` -query execute cube select customer_city_name, product_details.description, -product_details.category, product_details.color, store_sales from sales -where time_range_in(delivery_time, '2015-04-11-00', '2015-04-13-00') -``` - -![Lens Query Result]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-result.png) - -These are just examples that provided in advance by Lens. If you want to explore whole tutorials of Lens, see the [tutorial video](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples). - -## Lens UI Service -Lens also provides web UI service. Once the server starts up, you can open the service on http://serverhost:19999/index.html and browse. You may also check the structure that you made and use query easily here. - -![Lens UI Service]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/lens-ui-service.png) diff --git a/docs/interpreter/markdown.md b/docs/interpreter/markdown.md index e06c563e47a..a9c830652db 100644 --- a/docs/interpreter/markdown.md +++ b/docs/interpreter/markdown.md @@ -25,11 +25,11 @@ limitations under the License. ## Overview [Markdown](http://daringfireball.net/projects/markdown/) is a plain text formatting syntax designed so that it can be converted to HTML. -Apache Zeppelin uses [flexmark](https://github.com/vsch/flexmark-java), [pegdown](https://github.com/sirthias/pegdown) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers. +Apache Zeppelin uses [flexmark](https://github.com/vsch/flexmark-java) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers. In Zeppelin notebook, you can use ` %md ` in the beginning of a paragraph to invoke the Markdown interpreter and generate static html from Markdown plain text. -In Zeppelin, Markdown interpreter is enabled by default and uses the [pegdown](https://github.com/sirthias/pegdown) parser. +In Zeppelin, Markdown interpreter is enabled by default and uses the [flexmark](https://github.com/vsch/flexmark-java) parser. @@ -54,7 +54,7 @@ For more information, please see [Mathematical Expression](../usage/display_syst markdown.parser.type flexmark - Markdown Parser Type.
    Available values: flexmark, pegdown, markdown4j. + Markdown Parser Type.
    Available values: flexmark, markdown4j. @@ -68,13 +68,8 @@ CommonMark/Markdown Java parser with source level AST. -### Pegdown Parser - -`pegdown` parser provides github flavored markdown. Although still one of the most popular Markdown parsing libraries for the JVM, pegdown has reached its end of life. -The project is essentially unmaintained with tickets piling up and crucial bugs not being fixed.`pegdown`'s parsing performance isn't great. But keep this parser for the backward compatibility. - ### Markdown4j Parser -Since `pegdown` parser is more accurate and provides much more markdown syntax `markdown4j` option might be removed later. But keep this parser for the backward compatibility. +Since `flexmark` parser is more accurate and provides much more markdown syntax `markdown4j` option might be removed later. But keep this parser for the backward compatibility. diff --git a/docs/interpreter/pig.md b/docs/interpreter/pig.md deleted file mode 100644 index e640b34852e..00000000000 --- a/docs/interpreter/pig.md +++ /dev/null @@ -1,190 +0,0 @@ ---- -layout: page -title: "Pig Interpreter for Apache Zeppelin" -description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs." -group: manual ---- - -{% include JB/setup %} - - -# Pig Interpreter for Apache Zeppelin - -
    - -## Overview -[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of -a high-level language for expressing data analysis programs, -coupled with infrastructure for evaluating these programs. -The salient property of Pig programs is that their structure is amenable to substantial parallelization, -which in turns enables them to handle very large data sets. - -## Supported interpreter type - - `%pig.script` (default Pig interpreter, so you can use `%pig`) - - `%pig.script` is like the Pig grunt shell. Anything you can run in Pig grunt shell can be run in `%pig.script` interpreter, it is used for running Pig script where you don’t need to visualize the data, it is suitable for data munging. - - - `%pig.query` - - `%pig.query` is a little different compared with `%pig.script`. It is used for exploratory data analysis via Pig latin where you can leverage Zeppelin’s visualization ability. There're 2 minor differences in the last statement between `%pig.script` and `%pig.query` - - No pig alias in the last statement in `%pig.query` (read the examples below). - - The last statement must be in single line in `%pig.query` - - -## How to use - -### How to setup Pig execution modes. - -- Local Mode - - Set `zeppelin.pig.execType` as `local`. - -- MapReduce Mode - - Set `zeppelin.pig.execType` as `mapreduce`. HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`. - -- Tez Local Mode - - Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez_local`. - -- Tez Mode - - Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez`. HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`. - -- Spark Local Mode - - Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark_local`. - -- Spark Mode - - Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark`. For now, only yarn-client mode is supported. To enable it, you need to set property `SPARK_MASTER` to yarn-client and set `SPARK_JAR` to the spark assembly jar. - -### How to choose custom Spark Version - -By default, Pig Interpreter would use Spark 1.6.3 built with scala 2.10, if you want to use another spark version or scala version, -you need to rebuild Zeppelin by specifying the custom Spark version via -Dpig.spark.version= and scala version via -Dpig.scala.version= in the maven build command. - -### How to configure interpreter - -At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default. -And you can set any Pig properties here which will be passed to Pig engine. (like tez.queue.name & mapred.job.queue.name). -Besides, we use paragraph title as job name if it exists, else use the last line of Pig script. -So you can use that to find app running in YARN RM UI. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    PropertyDefaultDescription
    zeppelin.pig.execTypemapreduceExecution mode for pig runtime. local | mapreduce | tez_local | tez | spark_local | spark
    zeppelin.pig.includeJobStatsfalsewhether display jobStats info in %pig.script
    zeppelin.pig.maxResult1000max row number displayed in %pig.query
    tez.queue.namedefaultqueue name for tez engine
    mapred.job.queue.namedefaultqueue name for mapreduce engine
    SPARK_MASTERlocallocal | yarn-client
    SPARK_JARThe spark assembly jar, both jar in local or hdfs is supported. Put it on hdfs could have - performance benefit
    - -### Example - -##### pig - -``` -%pig - -bankText = load 'bank.csv' using PigStorage(';'); -bank = foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance; -bank = filter bank by age != '"age"'; -bank = foreach bank generate (int)age, REPLACE(job,'"','') as job, REPLACE(marital, '"', '') as marital, (int)(REPLACE(balance, '"', '')) as balance; -store bank into 'clean_bank.csv' using PigStorage(';'); -- this statement is optional, it just show you that most of time %pig.script is used for data munging before querying the data. -``` - -##### pig.query - -Get the number of each age where age is less than 30 - -``` -%pig.query - -bank_data = filter bank by age < 30; -b = group bank_data by age; -foreach b generate group, COUNT($1); -``` - -The same as above, but use dynamic text form so that use can specify the variable maxAge in textbox. -(See screenshot below). Dynamic form is a very cool feature of Zeppelin, you can refer this [link]((../usage/dynamic_form/intro.html)) for details. - -``` -%pig.query - -bank_data = filter bank by age < ${maxAge=40}; -b = group bank_data by age; -foreach b generate group, COUNT($1) as count; -``` - -Get the number of each age for specific marital type, -also use dynamic form here. User can choose the marital type in the dropdown list (see screenshot below). - -``` -%pig.query - -bank_data = filter bank by marital=='${marital=single,single|divorced|married}'; -b = group bank_data by age; -foreach b generate group, COUNT($1) as count; -``` - -The above examples are in the Pig tutorial note in Zeppelin, you can check that for details. Here's the screenshot. - - - - -Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, -and do different kinds of query based on the data of `%pig`. -Besides, we recommend you to specify alias explicitly so that the visualization can display -the column name correctly. In the above example 2 and 3 of `%pig.query`, we name `COUNT($1)` as `count`. -If you don't do this, then we will name it using position. -E.g. in the above first example of `%pig.query`, we will use `col_1` in chart to represent `COUNT($1)`. - - diff --git a/docs/interpreter/python.md b/docs/interpreter/python.md index 86fb1dbb627..07e37a7b67f 100644 --- a/docs/interpreter/python.md +++ b/docs/interpreter/python.md @@ -36,20 +36,61 @@ Zeppelin supports python language which is very popular in data analytics and ma %python PythonInterpreter - Vanilla python interpreter, with least dependencies, only python environment installed is required + Vanilla python interpreter, with least dependencies, only python environment installed is required, %python will use IPython if its prerequisites are met %python.ipython IPythonInterpreter - Provide more fancy python runtime via IPython, almost the same experience like Jupyter. It requires more things, but is the recommended interpreter for using python in Zeppelin, see below + Provide more fancy python runtime via IPython, almost the same experience like Jupyter. It requires more things, but is the recommended interpreter for using python in Zeppelin, see below for more details %python.sql PythonInterpreterPandasSql - Provide sql capability to query data in Pandas DataFrame via pandasql + Provide sql capability to query data in Pandas DataFrame via pandasql, it can access dataframes in %python +## Main Features + + + + + + + + + + + + + + + + + + + + + + +
    FeatureDescription
    Support vanilla Python and IPythonVanilla Python only requires python install, IPython provides almost the same user experience like Jupyter, like inline plotting, code completion, magic methods and etc.
    Built-in ZeppelinContext SupportYou can use ZeppelinContext to visualize pandas dataframe
    Support SQL on Pandas dataframeYou can use Sql to query dataframe which is defined in Python
    Run Python in yarn cluster with customized Python runtimeYou can run Python in yarn cluster with customized Python runtime without affecting each other
    + +## Play Python in Zeppelin docker + +For beginner, we would suggest you to play Python in Zeppelin docker first. +In the Zeppelin docker image, we have already installed +miniconda and lots of [useful python libraries](https://github.com/apache/zeppelin/blob/branch-0.10/scripts/docker/zeppelin/bin/env_python_3_with_R.yml) +including IPython's prerequisites, so `%python` would use IPython. + +Without any extra configuration, you can run most of tutorial notes under folder `Python Tutorial` directly. + + +```bash +docker run -u $(id -u) -p 8080:8080 --rm --name zeppelin apache/zeppelin:0.10.0 +``` + +After running the above command, you can open `http://localhost:8080` to play Python in Zeppelin. + ## Configuration @@ -80,12 +121,13 @@ Zeppelin supports python language which is very popular in data analytics and ma - + - +
    zeppelin.yarn.dist.archives Comma separated list of archives to be extracted into the working directory of interpreter. e.g. You can specify conda pack archive files via this property in python's yarn mode. It could be either files in local filesystem or files on hadoop compatible file systemsUsed for ipython in yarn mode. It is a general zeppelin interpreter configuration, not python specific. For Python interpreter it is used + to specify the conda env archive file which could be on local filesystem or on hadoop compatible file system.
    zeppelin.interpreter.conda.env.name conda environment name, aka the folder name in the working directory of interpreterUsed for ipython in yarn mode. conda environment name, aka the folder name in the working directory of interpreter yarn container.
    @@ -143,29 +185,32 @@ z.show(plt, height='150px', fmt='svg') - ## IPython Interpreter (`%python.ipython`) (recommended) -IPython is more powerful than the vanilla python interpreter with extra functionality. You can use IPython with Python2 or Python3 which depends on which python you set in `zeppelin.python`. +IPython is more powerful than the vanilla python interpreter with extra functionality. This is what we recommend you to use instead of vanilla python interpreter. You can use IPython with Python2 or Python3 which depends on which python you set in `zeppelin.python`. -For non-anaconda environment +### Prerequisites - **Prerequisites** - - - Jupyter `pip install jupyter` - - grpcio `pip install grpcio` - - protobuf `pip install protobuf` +* For non-anaconda environment, You need to install the following packages -For anaconda environment (`zeppelin.python` points to the python under anaconda) +``` +pip install jupyter +pip install grpcio +pip install protobuf +``` + +* For anaconda environment (`zeppelin.python` points to the python under anaconda) + +``` +pip install grpcio +pip install protobuf +``` - **Prerequisites** - - - grpcio `pip install grpcio` - - protobuf `pip install protobuf` +Zeppelin will check the above prerequisites when using `%python`, if IPython prerequisites are met, `%python` would use IPython interpreter, +otherwise it would use vanilla Python interpreter in `%python`. In addition to all the basic functions of the vanilla python interpreter, you can use all the IPython advanced features as you use it in Jupyter Notebook. - -e.g. +Take a look at tutorial note `Python Tutorial/1. IPython Basic` and `Python Tutorial/2. IPython Visualization Tutorial` for how to use IPython in Zeppelin. ### Use IPython magic @@ -193,67 +238,76 @@ plt.figure() plt.plot(data) ``` +### Run shell command + +``` +%python.ipython + +!pip install pandas +``` + ### Colored text output - + ### More types of visualization -e.g. IPython supports hvplot + +e.g. You can use hvplot in the same way as in Jupyter, Take a look at tutorial note `Python Tutorial/2. IPython Visualization Tutorial` for more visualization examples. + + ### Better code completion - +Type `tab` can give you all the completion candidates just like in Jupyter. + + -By default, Zeppelin would use IPython in `%python` if IPython prerequisites are meet, otherwise it would use vanilla Python interpreter in `%python`. -If you don't want to use IPython via `%python`, then you can set `zeppelin.python.useIPython` as `false` in interpreter setting. +## Pandas Integration -## Pandas integration Apache Zeppelin [Table Display System](../usage/display_system/basic.html#table) provides built-in data visualization capabilities. -Python interpreter leverages it to visualize Pandas DataFrames though similar `z.show()` API, same as with [Matplotlib integration](#matplotlib-integration). +Python interpreter leverages it to visualize Pandas DataFrames via `z.show()` API. -Example: +For example: -```python -%python + -import pandas as pd -rates = pd.read_csv("bank.csv", sep=";") -z.show(rates) -``` +By default, `z.show` only display 1000 rows, you can configure `zeppelin.python.maxResult` to adjust the max number of rows. ## SQL over Pandas DataFrames There is a convenience `%python.sql` interpreter that matches Apache Spark experience in Zeppelin and enables usage of SQL language to query [Pandas DataFrames](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) and -visualization of results though built-in [Table Display System](../usage/display_system/basic.html#table). +visualization of results through built-in [Table Display System](../usage/display_system/basic.html#table). +`%python.sql` can access dataframes defined in `%python`. - **Prerequisites** +**Prerequisites** - Pandas `pip install pandas` - PandaSQL `pip install -U pandasql` Here's one example: - - first paragraph +* first paragraph ```python %python - import pandas as pd rates = pd.read_csv("bank.csv", sep=";") ``` - - next paragraph +* next paragraph ```sql %python.sql - SELECT * FROM rates WHERE age < 40 ``` + + + ## Using Zeppelin Dynamic Forms You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/usage/dynamic_form/intro.html) inside your Python code. @@ -357,24 +411,31 @@ Python interpreter create a variable `z` which represent `ZeppelinContext` for y -## Run Python in yarn cluster +## Run Python interpreter in yarn cluster -Zeppelin supports to run python interpreter in yarn cluster which means the python interpreter runs in the yarn container. +Zeppelin supports to [run interpreter in yarn cluster](../quickstart/yarn.html) which means the python interpreter can run in a yarn container. This can achieve better multi-tenant for python interpreter especially when you already have a hadoop yarn cluster. -But there's one critical problem to run python in yarn cluster: how to manage the python environment in yarn container. Because yarn cluster is a distributed cluster environemt -which is composed many nodes, and your python interpreter can start in any node. It is not practical to manage python environment in each nodes. +But there's one critical problem to run python in yarn cluster: how to manage the python environment in yarn container. Because hadoop yarn cluster is a distributed cluster environment +which is composed of many nodes, and your python interpreter can start in any node. It is not practical to manage python environment in each node beforehand. So in order to run python in yarn cluster, we would suggest you to use conda to manage your python environment, and Zeppelin can ship your -codna environment to yarn container, so that each python interpreter can has its own python environment. +conda environment to yarn container, so that each python interpreter can have its own python environment without affecting each other. + +Python interpreter in yarn cluster only works for IPython, so make sure IPython's prerequisites are met. So make sure including the following packages in Step 1. + +* python +* jupyter +* grpcio +* protobuf ### Step 1 -We would suggest you to use conda pack to create archives of conda environments, and ship it to yarn container. Otherwise python interpreter -will use the python executable in PATH of yarn container. +We would suggest you to use [conda-pack](https://conda.github.io/conda-pack/) to create archive of conda environment, and ship it to yarn container. Otherwise python interpreter +will use the python executable file in PATH of yarn container. -Here's one example of yml file which could be used to generate a conda environment with python 3 and some useful python libraries. +Here's one example of yaml file which could be used to create a conda environment with python 3 and some useful python libraries. -* Create yml file for conda environment, write the following content into file `env_python_3.yml` +* Create yaml file for conda environment, write the following content into file `python_3_env.yml` ```text name: python_3_env @@ -382,17 +443,15 @@ channels: - conda-forge - defaults dependencies: - - python=3.7 + - python=3.9 + - jupyter + - grpcio + - protobuf - pycodestyle - numpy - pandas - scipy - - grpcio - - protobuf - - pandasql - - ipython - - ipykernel - - jupyter_client + - pandasql - panel - pyyaml - seaborn @@ -407,11 +466,11 @@ dependencies: ``` -* Create conda environment via this yml file using either `conda` or `mamba` +* Create conda environment via this yml file using either [conda](https://docs.conda.io/en/latest/) or [mamba](https://github.com/mamba-org/mamba) ```bash -conda env create -f env_python_3.yml +conda env create -f python_3_env.yml ``` ```bash @@ -420,28 +479,34 @@ mamba env create -f python_3_env ``` -* Pack the conda environment using either `conda` +* Pack the conda environment using `conda` ```bash -conda pack -n python_3 +conda pack -n python_3_env ``` ### Step 2 -Specify the following properties to enable yarn mode for python interpreter, and specify the correct python environment. +Specify the following properties to enable yarn mode for python interpreter. ``` +%python.conf + zeppelin.interpreter.launcher yarn -zeppelin.yarn.dist.archives /home/hadoop/python_3.tar.gz#environment +zeppelin.yarn.dist.archives /home/hadoop/python_3_env.tar.gz#environment zeppelin.interpreter.conda.env.name environment ``` +Setting `zeppelin.interpreter.launcher` as `yarn` will launch python interpreter in yarn cluster. + `zeppelin.yarn.dist.archives` is the python conda environment tar which is created in step 1. This tar will be shipped to yarn container and untar in the working directory of yarn container. -`environment` in `/home/hadoop/python_3.tar.gz#environment` is the folder name after untar. This folder name should be the same as `zeppelin.interpreter.conda.env.name`. +`environment` in `/home/hadoop/python_3.tar.gz#environment` is the folder name after untar. -## Python environments (used for non-yarn mode) +This folder name should be the same as `zeppelin.interpreter.conda.env.name`. Usually we name it as `environment` here. + +## Python environments (used for vanilla python interpreter in non-yarn mode) ### Default By default, PythonInterpreter will use python command defined in `zeppelin.python` property to run python process. @@ -529,14 +594,6 @@ Here is an example %python.docker activate gcr.io/tensorflow/tensorflow:latest ``` -## Technical description - -For in-depth technical details on current implementation please refer to [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md). - - -## Some features not yet implemented in the vanilla Python interpreter +## Community -* Interrupt a paragraph execution (`cancel()` method) is currently only supported in Linux and MacOs. -If interpreter runs in another operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter. -A JIRA ticket ([ZEPPELIN-893](https://issues.apache.org/jira/browse/ZEPPELIN-893)) is opened to implement this feature in a next release of the interpreter. -* Progression bar in webUI (`getProgress()` method) is currently not implemented. +[Join our community](http://zeppelin.apache.org/community.html) to discuss with others. diff --git a/docs/interpreter/r.md b/docs/interpreter/r.md index 2d39126fd4a..221f34e14e1 100644 --- a/docs/interpreter/r.md +++ b/docs/interpreter/r.md @@ -151,6 +151,26 @@ If you want to use R with Spark, it is almost the same via `%spark.r`, `%spark.i +## Play R in Zeppelin docker + +For beginner, we would suggest you to play R in Zeppelin docker first. In the Zeppelin docker image, we have already installed R and lots of useful R libraries including IRKernel's prerequisites, so `%r.ir` is available. + +Without any extra configuration, you can run most of tutorial notes under folder `R Tutorial` directly. + +``` +docker run -u $(id -u) -p 8080:8080 -p:6789:6789 --rm --name zeppelin apache/zeppelin:0.10.0 +``` + +After running the above command, you can open `http://localhost:8080` to play R in Zeppelin. +The port `6789` exposed in the above command is for R shiny app. You need to make the following 2 interpreter properties to enable shiny app accessible as iframe in Zeppelin docker container. + +* `zeppelin.R.shiny.portRange` to be `6789:6789` +* Set `ZEPPELIN_LOCAL_IP` to be `0.0.0.0` + + + + + ## Interpreter binding mode The default [interpreter binding mode](../usage/interpreter/interpreter_binding_mode.html) is `globally shared`. That means all notes share the same R interpreter. @@ -341,7 +361,7 @@ channels: - conda-forge - defaults dependencies: - - python=3.7 + - python=3.9 - jupyter - grpcio - protobuf diff --git a/docs/interpreter/sap.md b/docs/interpreter/sap.md deleted file mode 100644 index 0cb3a3c905b..00000000000 --- a/docs/interpreter/sap.md +++ /dev/null @@ -1,169 +0,0 @@ ---- - -layout: page - -title: "SAP BusinessObjects Interpreter for Apache Zeppelin" - -description: "SAP BusinessObjects BI platform can simplify the lives of business users and IT staff. SAP BusinessObjects is based on universes. The universe contains dual-semantic layer model. The users make queries upon universes. This interpreter is new interface for universes." - -group: interpreter - ---- - - - -{% include JB/setup %} - -# SAP BusinessObjects (Universe) Interpreter for Apache Zeppelin - -
    - -## Overview - -[SAP BusinessObjects BI platform (universes)](https://help.sap.com/viewer/p/SAP_BUSINESSOBJECTS_BUSINESS_INTELLIGENCE_PLATFORM) can simplify the lives of business users and IT staff. SAP BusinessObjects is based on universes. The universe contains dual-semantic layer model. The users make queries upon universes. This interpreter is new interface for universes. - -*Disclaimer* SAP interpreter is not official interpreter for SAP BusinessObjects BI platform. It uses [BI Semantic Layer REST API](https://help.sap.com/viewer/5431204882b44fc98d56bd752e69f132/4.2.5/en-US/ec54808e6fdb101497906a7cb0e91070.html) - -This interpreter is not directly supported by SAP AG. - -Tested with versions 4.2SP3 (14.2.3.2220) and 4.2SP5. There is no support for filters in UNX-universes converted from old UNV format. - -The universe name must be unique. - -## Configuring SAP Universe Interpreter - -At the "Interpreters" menu, you can edit SAP interpreter or create new one. Zeppelin provides these properties for SAP. - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property NameValueDescription
    universe.api.urlhttp://localhost:6405/biprwsThe base url for the SAP BusinessObjects BI platform. You have to edit "localhost" that you may use (ex. http://0.0.0.0:6405/biprws)
    universe.authTypesecEnterpriseThe type of authentication for API of Universe. Available values: secEnterprise, secLDAP, secWinAD, secSAPR3
    universe.passwordThe BI platform user password
    universe.userAdministratorThe BI platform user login
    - -![SAP Interpreter Setting]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/sap-interpreter-setting.png) - -### How to use - -
  • Choose the universe -
  • Choose dimensions and measures in `select` statement -
  • Define conditions in `where` statement -You can compare two dimensions/measures or use Filter (without value). -Dimesions/Measures can be compared with static values, may be `is null` or `is not null`, contains or not in list. -Available the nested conditions (using braces "()"). "and" operator have more priority than "or". - - -If generated query contains promtps, then promtps will appear as dynamic form after paragraph submitting. - -#### Example query -```sql -%sap - -universe [Universe Name]; - -select - - [Folder1].[Dimension2], - - [Folder2].[Dimension3], - - [Measure1] - -where - - [Filter1] - - and [Date] > '2018-01-01 00:00:00' - - and [Folder1].[Dimension4] is not null - - and [Folder1].[Dimension5] in ('Value1', 'Value2'); -``` - -### `distinct` keyword -You can write keyword `distinct` after keyword `select` to return only distinct (different) values. - -#### Example query -```sql -%sap -universe [Universe Name]; - -select distinct - [Folder1].[Dimension2], [Measure1] -where - [Filter1]; -``` - -### `limit` keyword -You can write keyword `limit` and limit value in the end of query to limit the number of records returned based on a limit value. - -#### Example query -```sql -%sap -universe [Universe Name]; - -select - [Folder1].[Dimension2], [Measure1] -where - [Filter1] -limit 100; -``` - -## Object Interpolation -The SAP interpreter also supports interpolation of `ZeppelinContext` objects into the paragraph text. -To enable this feature set `universe.interpolation` to `true`. The following example shows one use of this facility: - -####In Scala cell: - -```scala -z.put("curr_date", "2018-01-01 00:00:00") -``` - -####In later SAP cell: - -```sql -where - [Filter1] - and [Date] > '{curr_date}' -``` \ No newline at end of file diff --git a/docs/interpreter/scalding.md b/docs/interpreter/scalding.md deleted file mode 100644 index 02c5fb8b31f..00000000000 --- a/docs/interpreter/scalding.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -layout: page -title: "Scalding Interpreter for Apache Zeppelin" -description: "Scalding is an open source Scala library for writing MapReduce jobs." -group: interpreter ---- - -{% include JB/setup %} - -# Scalding Interpreter for Apache Zeppelin - -
    - -[Scalding](https://github.com/twitter/scalding) is an open source Scala library for writing MapReduce jobs. - -## Building the Scalding Interpreter -You have to first build the Scalding interpreter by enable the **scalding** profile as follows: - -```bash -mvn clean package -Pscalding -DskipTests -``` - -## Enabling the Scalding Interpreter -In a notebook, to enable the **Scalding** interpreter, click on the **Gear** icon,select **Scalding**, and hit **Save**. - -
    - -![Interpreter Binding]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/scalding-InterpreterBinding.png) - -![Interpreter Selection]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/scalding-InterpreterSelection.png) - -
    - -## Configuring the Interpreter - -Scalding interpreter runs in two modes: - -* local -* hdfs - -In the local mode, you can access files on the local server and scalding transformation are done locally. - -In hdfs mode you can access files in HDFS and scalding transformation are run as hadoop map-reduce jobs. - -Zeppelin comes with a pre-configured Scalding interpreter in local mode. - -To run the scalding interpreter in the hdfs mode you have to do the following: - -**Set the classpath with ZEPPELIN\_CLASSPATH\_OVERRIDES** - -In conf/zeppelin_env.sh, you have to set -ZEPPELIN_CLASSPATH_OVERRIDES to the contents of 'hadoop classpath' -and directories with custom jar files you need for your scalding commands. - -**Set arguments to the scalding repl** - -The default arguments are: `--local --repl` - -For hdfs mode you need to add: `--hdfs --repl` - -If you want to add custom jars, you need to add: `-libjars directory/*:directory/*` - -For reducer estimation, you need to add something like: -`-Dscalding.reducer.estimator.classes=com.twitter.scalding.reducer_estimation.InputSizeReducerEstimator` - -**Set max.open.instances** - -If you want to control the maximum number of open interpreters, you have to select "scoped" interpreter for note -option and set `max.open.instances` argument. - -## Testing the Interpreter - -### Local mode - -In example, by using the [Alice in Wonderland](https://gist.github.com/johnynek/a47699caa62f4f38a3e2) tutorial, -we will count words (of course!), and plot a graph of the top 10 words in the book. - -```scala -%scalding - -import scala.io.Source - -// Get the Alice in Wonderland book from gutenberg.org: -val alice = Source.fromURL("http://www.gutenberg.org/files/11/11.txt").getLines -val aliceLineNum = alice.zipWithIndex.toList -val alicePipe = TypedPipe.from(aliceLineNum) - -// Now get a list of words for the book: -val aliceWords = alicePipe.flatMap { case (text, _) => text.split("\\s+").toList } - -// Now lets add a count for each word: -val aliceWithCount = aliceWords.filterNot(_.equals("")).map { word => (word, 1L) } - -// let's sum them for each word: -val wordCount = aliceWithCount.group.sum - -print ("Here are the top 10 words\n") -val top10 = wordCount - .groupAll - .sortBy { case (word, count) => -count } - .take(10) -top10.dump - -``` -``` -%scalding - -val table = "words\t count\n" + top10.toIterator.map{case (k, (word, count)) => s"$word\t$count"}.mkString("\n") -print("%table " + table) - -``` - -If you click on the icon for the pie chart, you should be able to see a chart like this: -![Scalding - Pie - Chart]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/scalding-pie.png) - - -### HDFS mode - -**Test mode** - -``` -%scalding -mode -``` -This command should print: - -``` -res4: com.twitter.scalding.Mode = Hdfs(true,Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml) -``` - - -**Test HDFS read** - -```scala -val testfile = TypedPipe.from(TextLine("/user/x/testfile")) -testfile.dump -``` - -This command should print the contents of the hdfs file /user/x/testfile. - -**Test map-reduce job** - -```scala -val testfile = TypedPipe.from(TextLine("/user/x/testfile")) -val a = testfile.groupAll.size.values -a.toList - -``` - -This command should create a map reduce job. - -## Future Work -* Better user feedback (hadoop url, progress updates) -* Ability to cancel jobs -* Ability to dynamically load jars without restarting the interpreter -* Multiuser scalability (run scalding interpreters on different servers) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md deleted file mode 100644 index cb8d1278ec0..00000000000 --- a/docs/interpreter/scio.md +++ /dev/null @@ -1,169 +0,0 @@ ---- -layout: page -title: "Scio Interpreter for Apache Zeppelin" -description: "Scio is a Scala DSL for Apache Beam/Google Dataflow model." -group: interpreter ---- - -{% include JB/setup %} - -# Scio Interpreter for Apache Zeppelin - -
    - -## Overview -Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPlatform/DataflowJavaSDK) and [Apache Beam](http://beam.incubator.apache.org/) inspired by [Spark](http://spark.apache.org/) and [Scalding](https://github.com/twitter/scalding). See the current [wiki](https://github.com/spotify/scio/wiki) and [API documentation](http://spotify.github.io/scio/) for more information. - -## Configuration - - - - - - - - - - - - - - - - - -
    NameDefault ValueDescription
    zeppelin.scio.argz--runner=InProcessPipelineRunnerScio interpreter wide arguments. Documentation: https://github.com/spotify/scio/wiki#options and https://cloud.google.com/dataflow/pipelines/specifying-exec-params
    zeppelin.scio.maxResult1000Max number of SCollection results to display
    - -## Enabling the Scio Interpreter - -In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **beam** (**beam.scio**). - -## Using the Scio Interpreter - -In a paragraph, use `%beam.scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. - -```scala -%beam.scio -val (sc, args) = ContextAndArgs(argz) -``` - -Use `sc` context the way you would in a regular pipeline/REPL. - -Example: - -```scala -%beam.scio -val (sc, args) = ContextAndArgs(argz) -sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() -``` - -If you close Scio context, go ahead an create a new one using `ContextAndArgs`. Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more complex examples. You can close Scio context much the same way as in Scio REPL, and use Zeppelin display helpers to synchronously close and display results - read more below. - -### Progress - -There can be only one paragraph running at once. There is no notion of overall progress, thus progress bar will show `0`. - -### SCollection display helpers - -Scio interpreter comes with display helpers to ease working with Zeppelin notebooks. Simply use `closeAndDisplay()` on `SCollection` to close context and display the results. The number of results is limited by `zeppelin.scio.maxResult` (by default 1000). - -Supported `SCollection` types: - - * Scio's typed BigQuery - * Scala's Products (case classes, tuples) - * Google BigQuery's TableRow - * Apache Avro - * All Scala's `AnyVal` - -#### Helper methods - -There are different helper methods for different objects. You can easily display results from `SCollection`, `Future[Tap]` and `Tap`. - -##### `SCollection` helper - -`SCollection` has `closeAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously close Scio context, and once available pull and display results. - -##### `Future[Tap]` helper - -`Future[Tap]` has `waitAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously wait for results, and once available pull and display results. - -##### `Tap` helper - -`Tap` has `display` Zeppelin helper method for types listed above. Use it to pull and display results. - -### Examples - -#### BigQuery example: - -```scala -%beam.scio -@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays - |FROM [bigquery-samples:airline_ontime_data.flights] - |group by departure_airport - |order by 2 desc - |limit 10""".stripMargin) class Flights - -val (sc, args) = ContextAndArgs(argz) -sc.bigQuerySelect(Flights.query).closeAndDisplay(Flights.schema) -``` - -#### BigQuery typed example: - -```scala -%beam.scio -@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays - |FROM [bigquery-samples:airline_ontime_data.flights] - |group by departure_airport - |order by 2 desc - |limit 10""".stripMargin) class Flights - -val (sc, args) = ContextAndArgs(argz) -sc.typedBigQuery[Flights]().flatMap(_.no_of_delays).mean.closeAndDisplay() -``` - -#### Avro example: - -```scala -%beam.scio -import com.spotify.data.ExampleAvro - -val (sc, args) = ContextAndArgs(argz) -sc.avroFile[ExampleAvro]("gs:///tmp/my.avro").take(10).closeAndDisplay() -``` - -#### Avro example with a view schema: - -```scala -%beam.scio -import com.spotify.data.ExampleAvro -import org.apache.avro.Schema - -val (sc, args) = ContextAndArgs(argz) -val view = Schema.parse("""{"type":"record","name":"ExampleAvro","namespace":"com.spotify.data","fields":[{"name":"track","type":"string"}, {"name":"artist", "type":"string"}]}""") - -sc.avroFile[EndSongCleaned]("gs:///tmp/my.avro").take(10).closeAndDisplay(view) -``` - -### Google credentials - -Scio Interpreter will try to infer your Google Cloud credentials from its environment, it will take into the account: - - * `argz` interpreter settings ([doc](https://github.com/spotify/scio/wiki#options)) - * environment variable (`GOOGLE_APPLICATION_CREDENTIALS`) - * gcloud configuration - -#### BigQuery macro credentials - -Currently BigQuery project for macro expansion is inferred using Google Dataflow's [DefaultProjectFactory().create()](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java#L187) diff --git a/docs/interpreter/shell.md b/docs/interpreter/shell.md index 631c1337354..865b9150fb4 100644 --- a/docs/interpreter/shell.md +++ b/docs/interpreter/shell.md @@ -79,6 +79,11 @@ At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property v Internal and external IP mapping of zeppelin server + + zeppelin.concurrency.max + 10 + Max concurrency of shell interpreter + ## Example diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index fd0356ded89..680ca054b3b 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -26,7 +26,7 @@ limitations under the License. ## Overview [Apache Spark](http://spark.apache.org) is a fast and general-purpose cluster computing system. It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs. -Apache Spark is supported in Zeppelin with Spark interpreter group which consists of below six interpreters. +Apache Spark is supported in Zeppelin with Spark interpreter group which consists of following interpreters. @@ -52,20 +52,88 @@ Apache Spark is supported in Zeppelin with Spark interpreter group which consist - + + + + + + + + + + + +
    %spark.r SparkRInterpreterProvides an R environment with SparkR supportProvides an vanilla R environment with SparkR support
    %spark.irSparkIRInterpreterProvides an R environment with SparkR support based on Jupyter IRKernel
    %spark.shinySparkShinyInterpreterUsed to create R shiny app with SparkR support
    %spark.sql SparkSQLInterpreter Provides a SQL environment
    + +## Main Features + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + +
    FeatureDescription
    Support multiple versions of SparkYou can run different versions of Spark in one Zeppelin instance
    Support multiple versions of ScalaYou can run different Scala versions (2.12/2.13) of Spark in on Zeppelin instance
    Support multiple languagesScala, SQL, Python, R are supported, besides that you can also collaborate across languages, e.g. you can write Scala UDF and use it in PySpark
    Support multiple execution modesLocal | Standalone | Yarn | K8s
    Interactive developmentInteractive development user experience increase your productivity
    %spark.kotlinKotlinSparkInterpreterProvides a Kotlin environmentInline VisualizationYou can visualize Spark Dataset/DataFrame vis Python/R's plotting libraries, and even you can make SparkR Shiny app in Zeppelin
    Multi-tenancyMultiple user can work in one Zeppelin instance without affecting each other.
    Rest API SupportYou can not only submit Spark job via Zeppelin notebook UI, but also can do that via its rest api (You can use Zeppelin as Spark job server).
    +## Play Spark in Zeppelin docker + +For beginner, we would suggest you to play Spark in Zeppelin docker. +In the Zeppelin docker image, we have already installed +miniconda and lots of [useful python and R libraries](https://github.com/apache/zeppelin/blob/branch-0.10/scripts/docker/zeppelin/bin/env_python_3_with_R.yml) +including IPython and IRkernel prerequisites, so `%spark.pyspark` would use IPython and `%spark.ir` is enabled. +Without any extra configuration, you can run most of tutorial notes under folder `Spark Tutorial` directly. + +First you need to download Spark, because there's no Spark binary distribution shipped with Zeppelin. +e.g. Here we download Spark 3.1.2 to`/mnt/disk1/spark-3.1.2`, +and we mount it to Zeppelin docker container and run the following command to start Zeppelin docker container. + +```bash +docker run -u $(id -u) -p 8080:8080 -p 4040:4040 --rm -v /mnt/disk1/spark-3.1.2:/opt/spark -e SPARK_HOME=/opt/spark --name zeppelin apache/zeppelin:0.10.0 +``` + +After running the above command, you can open `http://localhost:8080` to play Spark in Zeppelin. We only verify the spark local mode in Zeppelin docker, other modes may not work due to network issues. +`-p 4040:4040` is to expose Spark web ui, so that you can access Spark web ui via `http://localhost:8081`. + ## Configuration The Spark interpreter can be configured with properties provided by Zeppelin. You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to [Spark Available Properties](http://spark.apache.org/docs/latest/configuration.html#available-properties). @@ -201,40 +269,35 @@ You can also set other Spark properties which are not listed in the table. For a Overrides Spark UI default URL. Value should be a full URL (ex: http://{hostName}/{uniquePath}. - In Kubernetes mode, value can be Jinja template string with 3 template variables 'PORT', 'SERVICE_NAME' and 'SERVICE_DOMAIN'. - (ex: http://{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}}) + In Kubernetes mode, value can be Jinja template string with 3 template variables PORT, {% raw %} SERVICE_NAME {% endraw %} and {% raw %} SERVICE_DOMAIN {% endraw %}. + (e.g.: {% raw %}http://{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}} {% endraw %}). In yarn mode, value could be a knox url with {% raw %} {{applicationId}} {% endraw %} as placeholder, + (e.g.: {% raw %}https://knox-server:8443/gateway/yarnui/yarn/proxy/{{applicationId}}/{% endraw %}) spark.webui.yarn.useProxy false - whether use yarn proxy url as spark weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004 - - - spark.repl.target - jvm-1.6 - - Manually specifying the Java version of Spark Interpreter Scala REPL,Available options:
    - scala-compile v2.10.7 to v2.11.12 supports "jvm-1.5, jvm-1.6, jvm-1.7 and jvm-1.8", and the default value is jvm-1.6.
    - scala-compile v2.10.1 to v2.10.6 supports "jvm-1.5, jvm-1.6, jvm-1.7", and the default value is jvm-1.6.
    - scala-compile v2.12.x defaults to jvm-1.8, and only supports jvm-1.8. - + whether use yarn proxy url as Spark weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004 Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps. -### Export SPARK_HOME +* Set SPARK_HOME +* Set master + + +### Set SPARK_HOME There are several options for setting `SPARK_HOME`. * Set `SPARK_HOME` in `zeppelin-env.sh` -* Set `SPARK_HOME` in Interpreter setting page +* Set `SPARK_HOME` in interpreter setting page * Set `SPARK_HOME` via [inline generic configuration](../usage/interpreter/overview.html#inline-generic-confinterpreter) -#### 1. Set `SPARK_HOME` in `zeppelin-env.sh` +#### Set `SPARK_HOME` in `zeppelin-env.sh` -If you work with only one version of spark, then you can set `SPARK_HOME` in `zeppelin-env.sh` because any setting in `zeppelin-env.sh` is globally applied. +If you work with only one version of Spark, then you can set `SPARK_HOME` in `zeppelin-env.sh` because any setting in `zeppelin-env.sh` is globally applied. e.g. @@ -251,21 +314,14 @@ export HADOOP_CONF_DIR=/usr/lib/hadoop ``` -#### 2. Set `SPARK_HOME` in Interpreter setting page +#### Set `SPARK_HOME` in interpreter setting page -If you want to use multiple versions of spark, then you need create multiple spark interpreters and set `SPARK_HOME` for each of them. e.g. -Create a new spark interpreter `spark24` for spark 2.4 and set `SPARK_HOME` in interpreter setting page -
    - -
    - -Create a new spark interpreter `spark16` for spark 1.6 and set `SPARK_HOME` in interpreter setting page -
    - -
    +If you want to use multiple versions of Spark, then you need to create multiple Spark interpreters and set `SPARK_HOME` separately. e.g. +Create a new Spark interpreter `spark33` for Spark 3.3 and set its `SPARK_HOME` in interpreter setting page, +Create a new Spark interpreter `spark34` for Spark 3.4 and set its `SPARK_HOME` in interpreter setting page. -#### 3. Set `SPARK_HOME` via [inline generic configuration](../usage/interpreter/overview.html#inline-generic-confinterpreter) +#### Set `SPARK_HOME` via [inline generic configuration](../usage/interpreter/overview.html#inline-generic-confinterpreter) Besides setting `SPARK_HOME` in interpreter setting page, you can also use inline generic configuration to put the configuration with code together for more flexibility. e.g. @@ -273,23 +329,26 @@ configuration with code together for more flexibility. e.g. -### Set master in Interpreter menu -After starting Zeppelin, go to **Interpreter** menu and edit **spark.master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type. +### Set master + +After setting `SPARK_HOME`, you need to set **spark.master** property in either interpreter setting page or inline configuartion. The value may vary depending on your Spark cluster deployment type. For example, * **local[*]** in local mode * **spark://master:7077** in standalone cluster - * **yarn-client** in Yarn client mode (Not supported in spark 3.x, refer below for how to configure yarn-client in Spark 3.x) - * **yarn-cluster** in Yarn cluster mode (Not supported in spark 3.x, refer below for how to configure yarn-client in Spark 3.x) + * **yarn-client** in Yarn client mode (Not supported in Spark 3.x, refer below for how to configure yarn-client in Spark 3.x) + * **yarn-cluster** in Yarn cluster mode (Not supported in Spark 3.x, refer below for how to configure yarn-cluster in Spark 3.x) * **mesos://host:5050** in Mesos cluster That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way. For the further information about Spark & Zeppelin version compatibility, please refer to "Available Interpreters" section in [Zeppelin download page](https://zeppelin.apache.org/download.html). -> Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile. +Note that without setting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile. And this included version Spark has limited function, so it +is always recommended to set `SPARK_HOME`. -> Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So we suggest you only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`. +Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many Spark interpreters running at the same time. So we suggest you +only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`. #### Configure yarn mode for Spark 3.x @@ -314,77 +373,55 @@ Specifying `yarn-client` & `yarn-cluster` in `spark.master` is not supported in -## SparkContext, SQLContext, SparkSession, ZeppelinContext +## Interpreter binding mode -SparkContext, SQLContext, SparkSession (for spark 2.x) and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext`, `spark` and `z`, respectively, in Scala, Kotlin, Python and R environments. +The default [interpreter binding mode](../usage/interpreter/interpreter_binding_mode.html) is `globally shared`. That means all notes share the same Spark interpreter. +So we recommend you to use `isolated per note` which means each note has own Spark interpreter without affecting each other. But it may run out of your machine resource if too many +Spark interpreters are created, so we recommend to always use yarn-cluster mode in production if you run Spark in hadoop cluster. And you can use [inline configuration](../usage/interpreter/overview.html#inline-generic-configuration) via `%spark.conf` in the first paragraph to customize your spark configuration. -> Note that Scala/Python/R environment shares the same SparkContext, SQLContext, SparkSession and ZeppelinContext instance. +You can also choose `scoped` mode. For `scoped` per note mode, Zeppelin creates separated scala compiler/python shell for each note but share a single `SparkContext/SqlContext/SparkSession`. -## YARN Mode -Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify `SPARK_HOME` & `HADOOP_CONF_DIR`. -Usually you only have one hadoop cluster, so you can set `HADOOP_CONF_DIR` in `zeppelin-env.sh` which is applied to all spark interpreters. If you want to use spark against multiple hadoop cluster, then you need to define -`HADOOP_CONF_DIR` in interpreter setting or via inline generic configuration. -## Dependency Management +## SparkContext, SQLContext, SparkSession, ZeppelinContext -For spark interpreter, it is not recommended to use Zeppelin's [Dependency Management](../usage/interpreter/dependency_management.html) for managing -third party dependencies (`%spark.dep` is removed from Zeppelin 0.9 as well). Instead you should set the standard Spark properties. +SparkContext, SparkSession and ZeppelinContext are automatically created and exposed as variable names `sc`, `spark` and `z` respectively, in Scala, Python and R environments. - - - - - - - - - - - - - - - - - - - - - -
    Spark PropertySpark Submit ArgumentDescription
    spark.files--filesComma-separated list of files to be placed in the working directory of each executor. Globs are allowed.
    spark.jars--jarsComma-separated list of jars to include on the driver and executor classpaths. Globs are allowed.
    spark.jars.packages--packagesComma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories.
    -You can either set Spark properties in interpreter setting page or set Spark submit arguments in `zeppelin-env.sh` via environment variable `SPARK_SUBMIT_OPTIONS`. -For examples: +> Note that Scala/Python/R environment shares the same SparkContext, SQLContext, SparkSession and ZeppelinContext instance. -```bash -export SPARK_SUBMIT_OPTIONS="--files --jars --packages " -``` +## Yarn Mode + +Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify `SPARK_HOME` & `HADOOP_CONF_DIR`. +Usually you only have one hadoop cluster, so you can set `HADOOP_CONF_DIR` in `zeppelin-env.sh` which is applied to all Spark interpreters. If you want to use spark against multiple hadoop cluster, then you need to define +`HADOOP_CONF_DIR` in interpreter setting or via inline generic configuration. + +## K8s Mode -But it is not recommended to set them in `SPARK_SUBMIT_OPTIONS`. Because it will be shared by all spark interpreters, which means you can not set different dependencies for different users. +Regarding how to run Spark on K8s in Zeppelin, please check [this doc](../quickstart/kubernetes.html). ## PySpark -There're 2 ways to use PySpark in Zeppelin: +There are 2 ways to use PySpark in Zeppelin: * Vanilla PySpark * IPySpark ### Vanilla PySpark (Not Recommended) -Vanilla PySpark interpreter is almost the same as vanilla Python interpreter except Zeppelin inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`. -By default, Zeppelin would use IPython in `%spark.pyspark` when IPython is available, Otherwise it would fall back to the original PySpark implementation. -If you don't want to use IPython, then you can set `zeppelin.pyspark.useIPython` as `false` in interpreter setting. For the IPython features, you can refer doc -[Python Interpreter](python.html) +Vanilla PySpark interpreter is almost the same as vanilla Python interpreter except Spark interpreter inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`. + +By default, Zeppelin would use IPython in `%spark.pyspark` when IPython is available (Zeppelin would check whether ipython's prerequisites are met), Otherwise it would fall back to the vanilla PySpark implementation. ### IPySpark (Recommended) -You can use `IPySpark` explicitly via `%spark.ipyspark`. IPySpark interpreter is almost the same as IPython interpreter except Zeppelin inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`. -For the IPython features, you can refer doc [Python Interpreter](python.html) + +You can use `IPySpark` explicitly via `%spark.ipyspark`. IPySpark interpreter is almost the same as IPython interpreter except Spark interpreter inject SparkContext, SQLContext, SparkSession via variables `sc`, `sqlContext`, `spark`. +For the IPython features, you can refer doc [Python Interpreter](python.html#ipython-interpreter-pythonipython-recommended) ## SparkR -Zeppelin support SparkR via `%spark.r`. Here's configuration for SparkR Interpreter. +Zeppelin support SparkR via `%spark.r`, `%spark.ir` and `%spark.shiny`. Here's configuration for SparkR Interpreter. @@ -412,12 +449,28 @@ Zeppelin support SparkR via `%spark.r`. Here's configuration for SparkR Interpre + + + + + + + + + + + + + + +
    out.format = 'html', comment = NA, echo = FALSE, results = 'asis', message = F, warning = F, fig.retina = 2 R plotting options.
    zeppelin.R.shiny.iframe_width100%IFrame width of Shiny App
    zeppelin.R.shiny.iframe_height500pxIFrame height of Shiny App
    zeppelin.R.shiny.portRange:Shiny app would launch a web app at some port, this property is to specify the portRange via format ':', e.g. '5000:5001'. By default it is ':' which means any port
    +Refer [R doc](r.html) for how to use R in Zeppelin. ## SparkSql -Spark Sql Interpreter share the same SparkContext/SparkSession with other Spark interpreter. That means any table registered in scala, python or r code can be accessed by Spark Sql. +Spark sql interpreter share the same SparkContext/SparkSession with other Spark interpreters. That means any table registered in scala, python or r code can be accessed by Spark sql. For examples: ```scala @@ -435,11 +488,13 @@ df.createOrReplaceTempView("people") select * from people ``` -By default, each sql statement would run sequentially in `%spark.sql`. But you can run them concurrently by following setup. +You can write multiple sql statements in one paragraph. Each sql statement is separated by semicolon. +Sql statement in one paragraph would run sequentially. +But sql statements in different paragraphs can run concurrently by the following configuration. -1. Set `zeppelin.spark.concurrentSQL` to true to enable the sql concurrent feature, underneath zeppelin will change to use fairscheduler for spark. And also set `zeppelin.spark.concurrentSQL.max` to control the max number of sql statements running concurrently. +1. Set `zeppelin.spark.concurrentSQL` to true to enable the sql concurrent feature, underneath zeppelin will change to use fairscheduler for Spark. And also set `zeppelin.spark.concurrentSQL.max` to control the max number of sql statements running concurrently. 2. Configure pools by creating `fairscheduler.xml` under your `SPARK_CONF_DIR`, check the official spark doc [Configuring Pool Properties](http://spark.apache.org/docs/latest/job-scheduling.html#configuring-pool-properties) -3. Set pool property via setting paragraph property. e.g. +3. Set pool property via setting paragraph local property. e.g. ``` %spark(pool=pool1) @@ -448,25 +503,61 @@ By default, each sql statement would run sequentially in `%spark.sql`. But you c ``` This pool feature is also available for all versions of scala Spark, PySpark. For SparkR, it is only available starting from 2.3.0. - -## Interpreter Setting Option -You can choose one of `shared`, `scoped` and `isolated` options when you configure Spark interpreter. -e.g. +## Dependency Management + +For Spark interpreter, it is not recommended to use Zeppelin's [Dependency Management](../usage/interpreter/dependency_management.html) for managing +third party dependencies (`%spark.dep` is removed from Zeppelin 0.9 as well). Instead, you should set the standard Spark properties as following: + + + + + + + + + + + + + + + + + + + + + + +
    Spark PropertySpark Submit ArgumentDescription
    spark.files--filesComma-separated list of files to be placed in the working directory of each executor. Globs are allowed.
    spark.jars--jarsComma-separated list of jars to include on the driver and executor classpaths. Globs are allowed.
    spark.jars.packages--packagesComma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories.
    + +As general Spark properties, you can set them in via inline configuration or interpreter setting page or in `zeppelin-env.sh` via environment variable `SPARK_SUBMIT_OPTIONS`. +For examples: + +```bash +export SPARK_SUBMIT_OPTIONS="--files --jars --packages " +``` + +To be noticed, `SPARK_SUBMIT_OPTIONS` is deprecated and will be removed in future release. -* In `scoped` per user mode, Zeppelin creates separated Scala compiler for each user but share a single SparkContext. -* In `isolated` per user mode, Zeppelin creates separated SparkContext for each user. ## ZeppelinContext + Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities. -See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. +See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. For Spark interpreter, you can use z to display Spark `Dataset/Dataframe`. + + + + ## Setting up Zeppelin with Kerberos + Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN: -There're several ways to make spark work with kerberos enabled hadoop cluster in Zeppelin. +There are several ways to make Spark work with kerberos enabled hadoop cluster in Zeppelin. 1. Share one single hadoop cluster. In this case you just need to specify `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` in zeppelin-site.xml, Spark interpreter will use these setting by default. @@ -474,11 +565,26 @@ In this case you just need to specify `zeppelin.server.kerberos.keytab` and `zep 2. Work with multiple hadoop clusters. In this case you can specify `spark.yarn.keytab` and `spark.yarn.principal` to override `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal`. +### Configuration Setup + +1. On the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf. + This is to make the server communicate with KDC. + +2. Add the two properties below to Spark configuration (`[SPARK_HOME]/conf/spark-defaults.conf`): + + ``` + spark.yarn.principal + spark.yarn.keytab + ``` + +> **NOTE:** If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI. + +3. That's it. Play with Zeppelin! ## User Impersonation -In yarn mode, the user who launch the zeppelin server will be used to launch the spark yarn application. This is not a good practise. -Most of time, you will enable shiro in Zeppelin and would like to use the login user to submit the spark yarn app. For this purpose, +In yarn mode, the user who launch the zeppelin server will be used to launch the Spark yarn application. This is not a good practise. +Most of time, you will enable shiro in Zeppelin and would like to use the login user to submit the Spark yarn app. For this purpose, you need to enable user impersonation for more security control. In order the enable user impersonation, you need to do the following steps **Step 1** Enable user impersonation setting hadoop's `core-site.xml`. E.g. if you are using user `zeppelin` to launch Zeppelin, then add the following to `core-site.xml`, then restart both hdfs and yarn. @@ -500,27 +606,6 @@ you need to enable user impersonation for more security control. In order the en **Step 3(Optional)** If you are using kerberos cluster, then you need to set `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` to the user(aka. user in Step 1) you want to impersonate in `zeppelin-site.xml`. +## Community - -## Deprecate Spark 2.2 and earlier versions -Starting from 0.9, Zeppelin deprecate Spark 2.2 and earlier versions. So you will see a warning message when you use Spark 2.2 and earlier. -You can get rid of this message by setting `zeppelin.spark.deprecatedMsg.show` to `false`. - - - -### Configuration Setup - -1. On the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf. -This is to make the server communicate with KDC. - -2. Add the two properties below to Spark configuration (`[SPARK_HOME]/conf/spark-defaults.conf`): - - ``` - spark.yarn.principal - spark.yarn.keytab - ``` - - > **NOTE:** If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI. - -3. That's it. Play with Zeppelin! - +[Join our community](http://zeppelin.apache.org/community.html) to discuss with others. diff --git a/docs/interpreter/submarine.md b/docs/interpreter/submarine.md deleted file mode 100644 index 97be1bb7e02..00000000000 --- a/docs/interpreter/submarine.md +++ /dev/null @@ -1,407 +0,0 @@ ---- -layout: page -title: "Apache Hadoop Submarine Interpreter for Apache Zeppelin" -description: "Hadoop Submarine is the latest machine learning framework subproject in the Hadoop 3.1 release. It allows Hadoop to support Tensorflow, MXNet, Caffe, Spark, etc." -group: interpreter ---- - -{% include JB/setup %} - -# Submarine Interpreter for Apache Zeppelin - -
    - -[Hadoop Submarine ](https://hadoop.apache.org/submarine/) is the latest machine learning framework subproject in the Hadoop 3.1 release. It allows Hadoop to support Tensorflow, MXNet, Caffe, Spark, etc. A variety of deep learning frameworks provide a full-featured system framework for machine learning algorithm development, distributed model training, model management, and model publishing, combined with hadoop's intrinsic data storage and data processing capabilities to enable data scientists to Good mining and the value of the data. - -A deep learning algorithm project requires data acquisition, data processing, data cleaning, interactive visual programming adjustment parameters, algorithm testing, algorithm publishing, algorithm job scheduling, offline model training, model online services and many other processes and processes. Zeppelin is a web-based notebook that supports interactive data analysis. You can use SQL, Scala, Python, etc. to make data-driven, interactive, collaborative documents. - -You can use the more than 20 interpreters in zeppelin (for example: spark, hive, Cassandra, Elasticsearch, Kylin, HBase, etc.) to collect data, clean data, feature extraction, etc. in the data in Hadoop before completing the machine learning model training. The data preprocessing process. - -By integrating submarine in zeppelin, we use zeppelin's data discovery, data analysis and data visualization and collaboration capabilities to visualize the results of algorithm development and parameter adjustment during machine learning model training. - -## Architecture - - - -As shown in the figure above, how the Submarine develops and models the machine learning algorithms through Zeppelin is explained from the system architecture. - -After installing and deploying Hadoop 3.1+ and Zeppelin, submarine will create a fully separate Zeppelin Submarine interpreter Docker container for each user in YARN. This container contains the development and runtime environment for Tensorflow. Zeppelin Server connects to the Zeppelin Submarine interpreter Docker container in YARN. allows algorithmic engineers to perform algorithm development and data visualization in Tensorflow's stand-alone environment in Zeppelin Notebook. - -After the algorithm is developed, the algorithm engineer can submit the algorithm directly to the YARN in offline transfer training in Zeppelin, real-time demonstration of model training with Submarine's TensorBoard for each algorithm engineer. - -You can not only complete the model training of the algorithm, but you can also use the more than twenty interpreters in Zeppelin. Complete the data preprocessing of the model, For example, you can perform data extraction, filtering, and feature extraction through the Spark interpreter in Zeppelin in the Algorithm Note. - -In the future, you can also use Zeppelin's upcoming Workflow workflow orchestration service. You can complete Spark, Hive data processing and Tensorflow model training in one Note. It is organized into a workflow through visualization, etc., and the scheduling of jobs is performed in the production environment. - -## Overview - - - -As shown in the figure above, from the internal implementation, how Submarine combines Zeppelin's machine learning algorithm development and model training. - -1. The algorithm engineer created a Tensorflow notebook (left image) in Zeppelin by using Submarine interpreter. - - It is important to note that you need to complete the development of the entire algorithm in a Note. - -2. You can use Spark for data preprocessing in some of the paragraphs in Note. - -3. Use Python for algorithm development and debugging of Tensorflow in other paragraphs of notebook, Submarine creates a Zeppelin Submarine Interpreter Docker Container for you in YARN, which contains the following features and services: - - + **Shell Command line tool**:Allows you to view the system environment in the Zeppelin Submarine Interpreter Docker Container, Install the extension tools you need or the Python dependencies. - + **Kerberos lib**:Allows you to perform kerberos authentication and access to Hadoop clusters with Kerberos authentication enabled. - + **Tensorflow environment**:Allows you to develop tensorflow algorithm code. - + **Python environment**:Allows you to develop tensorflow code. - + Complete a complete algorithm development with a Note in Zeppelin. If this algorithm contains multiple modules, You can write different algorithm modules in multiple paragraphs in Note. The title of each paragraph is the name of the algorithm module. The content of the paragraph is the code content of this algorithm module. - + **HDFS Client**:Zeppelin Submarine Interpreter will automatically submit the algorithm code you wrote in Note to HDFS. - - **Submarine interpreter Docker Image** It is Submarine that provides you with an image file that supports Tensorflow (CPU and GPU versions). -And installed the algorithm library commonly used by Python. -You can also install other development dependencies you need on top of the base image provided by Submarine. - -4. When you complete the development of the algorithm module, You can do this by creating a new paragraph in Note and typing `%submarine dashboard`. Zeppelin will create a Submarine Dashboard. The machine learning algorithm written in this Note can be submitted to YARN as a JOB by selecting the `JOB RUN` command option in the Control Panel. Create a Tensorflow Model Training Docker Container, The container contains the following sections: - - + Tensorflow environment - + HDFS Client Will automatically download the algorithm file Mount from HDFS into the container for distributed model training. Mount the algorithm file to the Work Dir path of the container. - - **Submarine Tensorflow Docker Image** There is Submarine that provides you with an image file that supports Tensorflow (CPU and GPU versions). And installed the algorithm library commonly used by Python. You can also install other development dependencies you need on top of the base image provided by Submarine. - - - - - - - - - - - - - - - - - - - - - - -
    NameClassDescription
    %submarineSubmarineInterpreterProvides interpreter for Apache Submarine dashboard
    %submarine.shSubmarineShellInterpreterProvides interpreter for Apache Submarine shell
    %submarine.pythonPySubmarineInterpreterProvides interpreter for Apache Submarine python
    - -### Submarine shell - -After creating a Note with Submarine Interpreter in Zeppelin, You can add a paragraph to Note if you need it. Using the %submarine.sh identifier, you can use the Shell command to perform various operations on the Submarine Interpreter Docker Container, such as: - -1. View the Pythone version in the Container -2. View the system environment of the Container -3. Install the dependencies you need yourself -4. Kerberos certification with kinit -5. Use Hadoop in Container for HDFS operations, etc. - -### Submarine python - -You can add one or more paragraphs to Note. Write the algorithm module for Tensorflow in Python using the `%submarine.python` identifier. - -### Submarine Dashboard - -After writing the Tensorflow algorithm by using `%submarine.python`, You can add a paragraph to Note. Enter the %submarine dashboard and execute it. Zeppelin will create a Submarine Dashboard. - - - -With Submarine Dashboard you can do all the operational control of Submarine, for example: - -1. **Usage**:Display Submarine's command description to help developers locate problems. - -2. **Refresh**:Zeppelin will erase all your input in the Dashboard. - -3. **Tensorboard**:You will be redirected to the Tensorboard WEB system created by Submarine for each user. With Tensorboard you can view the real-time status of the Tensorflow model training in real time. - -4. **Command** - - + **JOB RUN**:Selecting `JOB RUN` will display the parameter input interface for submitting JOB. - - - - - - - - - - - - - - - - - - - - - - -
    NameDescription
    Checkpoint Path/td> - Submarine sets up a separate Checkpoint path for each user's Note for Tensorflow training. Saved the training data for this Note history, Used to train the output of model data, Tensorboard uses the data in this path for model presentation. Users cannot modify it. For example: `hdfs://cluster1/...` , The environment variable name for Checkpoint Path is `%checkpoint_path%`, You can use `%checkpoint_path%` instead of the input value in Data Path in `PS Launch Cmd` and `Worker Launch Cmd`.
    Input PathThe user specifies the data data directory of the Tensorflow algorithm. Only HDFS-enabled directories are supported. The environment variable name for Data Path is `%input_path%`, You can use `%input_path%` instead of the input value in Data Path in `PS Launch Cmd` and `Worker Launch Cmd`.
    PS Launch CmdTensorflow Parameter services launch command,例如:`python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --num-gpus=0 ...`
    Worker Launch CmdTensorflow Worker services launch command,例如:`python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --num-gpus=1 ...`
    - - + **JOB STOP** - - You can choose to execute the `JOB STOP` command. Stop a Tensorflow model training task that has been submitted and is running - - + **TENSORBOARD START** - - You can choose to execute the `TENSORBOARD START` command to create your TENSORBOARD Docker Container. - - + **TENSORBOARD STOP** - - You can choose to execute the `TENSORBOARD STOP` command to stop and destroy your TENSORBOARD Docker Container. - -5. **Run Command**:Execute the action command of your choice -6. **Clean Chechkpoint**:Checking this option will clear the data in this Note's Checkpoint Path before each `JOB RUN` execution. - -### Configuration - -Zeppelin Submarine interpreter provides the following properties to customize the Submarine interpreter - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Attribute nameAttribute valueDescription
    DOCKER_CONTAINER_TIME_ZONEEtc/UTCSet the time zone in the container | -
    DOCKER_HADOOP_HDFS_HOME/hadoop-3.1-0Hadoop path in the following 3 images(SUBMARINE_INTERPRETER_DOCKER_IMAGE、tf.parameter.services.docker.image、tf.worker.services.docker.image) | -
    DOCKER_JAVA_HOME/opt/javaJAVA path in the following 3 images(SUBMARINE_INTERPRETER_DOCKER_IMAGE、tf.parameter.services.docker.image、tf.worker.services.docker.image) | -
    HADOOP_YARN_SUBMARINE_JARPath to the Submarine JAR package in the Hadoop-3.1+ release installed on the Zeppelin server | -
    INTERPRETER_LAUNCH_MODElocal/yarnRun the Submarine interpreter instance in local or YARN local mainly for submarine interpreter development and debugging YARN mode for production environment | -
    SUBMARINE_HADOOP_CONF_DIRSet the HADOOP-CONF path to support multiple Hadoop cluster environments
    SUBMARINE_HADOOP_HOMEHadoop-3.1+ above path installed on the Zeppelin server
    SUBMARINE_HADOOP_KEYTABKeytab file path for a hadoop cluster with kerberos authentication turned on
    SUBMARINE_HADOOP_PRINCIPALPRINCIPAL information for the keytab file of the hadoop cluster with kerberos authentication turned on
    SUBMARINE_INTERPRETER_DOCKER_IMAGEAt INTERPRETER_LAUNCH_MODE=yarn, Submarine uses this image to create a Zeppelin Submarine interpreter container to create an algorithm development environment for the user. | -
    docker.container.networkYARN's Docker network name
    machinelearing.distributed.enableWhether to use the model training of the distributed mode JOB RUN submission
    shell.command.timeout.millisecs60000Execute timeout settings for shell commands in the Submarine interpreter container
    submarine.algorithm.hdfs.pathSave machine-based algorithms developed using Submarine interpreter to HDFS as files
    submarine.yarn.queueroot.defaultSubmarine submits model training YARN queue name
    tf.checkpoint.pathTensorflow checkpoint path, Each user will create a user's checkpoint secondary path using the username under this path. Each algorithm submitted by the user will create a checkpoint three-level path using the note id (the user's Tensorboard uses the checkpoint data in this path for visual display)
    tf.parameter.services.cpuNumber of CPU cores applied to Tensorflow parameter services when Submarine submits model distributed training
    tf.parameter.services.docker.imageSubmarine creates a mirror for Tensorflow parameter services when submitting model distributed training
    tf.parameter.services.gpuGPU cores applied to Tensorflow parameter services when Submarine submits model distributed training
    tf.parameter.services.memory2GMemory resources requested by Tensorflow parameter services when Submarine submits model distributed training
    tf.parameter.services.numNumber of Tensorflow parameter services used by Submarine to submit model distributed training
    tf.tensorboard.enabletrueCreate a separate Tensorboard for each user
    tf.worker.services.cpuSubmarine submits model resources for Tensorflow worker services when submitting model training
    tf.worker.services.docker.imageSubmarine creates a mirror for Tensorflow worker services when submitting model distributed training
    tf.worker.services.gpuSubmarine submits GPU resources for Tensorflow worker services when submitting model training
    tf.worker.services.memorySubmarine submits model resources for Tensorflow worker services when submitting model training
    tf.worker.services.numNumber of Tensorflow worker services used by Submarine to submit model distributed training
    yarn.webapp.http.addresshttp://hadoop:8088YARN web ui address
    zeppelin.interpreter.rpc.portRange29914You need to export this port in the SUBMARINE_INTERPRETER_DOCKER_IMAGE configuration image. RPC communication for Zeppelin Server and Submarine interpreter containers
    zeppelin.ipython.grpc.message_size33554432Message size setting for IPython grpc in Submarine interpreter container
    zeppelin.ipython.launch.timeout30000IPython execution timeout setting in Submarine interpreter container
    zeppelin.pythonpythonExecution path of python in Submarine interpreter container
    zeppelin.python.maxResult10000The maximum number of python execution results returned from the Submarine interpreter container
    zeppelin.python.useIPythonfalseIPython is currently not supported and must be false
    zeppelin.submarine.auth.typesimple/kerberosHas Hadoop turned on kerberos authentication?
    - -### Docker images - -The docker images file is stored in the `zeppelin/scripts/docker/submarine` directory. - -1. submarine interpreter cpu version - -2. submarine interpreter gpu version - -3. tensorflow 1.10 & hadoop 3.1.2 cpu version - -4. tensorflow 1.10 & hadoop 3.1.2 gpu version - - -## Change Log - -**0.1.0** _(Zeppelin 0.9.0)_ : - -* Support distributed or standolone tensorflow model training. -* Support submarine interpreter running local. -* Support submarine interpreter running YARN. -* Support Docker on YARN-3.3.0, Plan compatible with lower versions of yarn. - -## Bugs & Contacts - -+ **Submarine interpreter BUG** - If you encounter a bug for this interpreter, please create a sub **JIRA** ticket on [ZEPPELIN-3856](https://issues.apache.org/jira/browse/ZEPPELIN-3856). -+ **Submarine Running problem** - If you encounter a problem for Submarine runtime, please create a **ISSUE** on [hadoop-submarine-ecosystem](https://github.com/hadoopsubmarine/hadoop-submarine-ecosystem). -+ **YARN Submarine BUG** - If you encounter a bug for Yarn Submarine, please create a **JIRA** ticket on [SUBMARINE](https://issues.apache.org/jira/browse/SUBMARINE). - -## Dependency - -1. **YARN** - Submarine currently need to run on Hadoop 3.3+ - - + The hadoop version of the hadoop submarine team git repository is periodically submitted to the code repository of the hadoop. - + The version of the git repository for the hadoop submarine team will be faster than the hadoop version release cycle. - + You can use the hadoop version of the hadoop submarine team git repository. - -2. **Submarine runtime environment** - you can use Submarine-installer https://github.com/hadoopsubmarine, Deploy Docker and network environments. - -## More - -**Hadoop Submarine Project**: https://hadoop.apache.org/submarine -**Youtube Submarine Channel**: https://www.youtube.com/channel/UC4JBt8Y8VJ0BW0IM9YpdCyQ \ No newline at end of file diff --git a/docs/quickstart/docker.md b/docs/quickstart/docker.md index 0c6a478ff12..17e6229d7bd 100644 --- a/docs/quickstart/docker.md +++ b/docs/quickstart/docker.md @@ -19,9 +19,9 @@ limitations under the License. --> {% include JB/setup %} -# Zeppelin interpreter on Docker +# Zeppelin Interpreter on Docker -Zeppelin service runs on local server. zeppelin is able to run the interpreter in the docker container, Isolating the operating environment of the interpreter through the docker container. Zeppelin can be easily used without having to install python, spark, etc. on the local node. +Zeppelin service runs on local server. Zeppelin is able to run the interpreter in the docker container, Isolating the operating environment of the interpreter through the docker container. Zeppelin can be easily used without having to install python, spark, etc. on the local node. Key benefits are @@ -55,6 +55,15 @@ vi `/etc/docker/daemon.json`, Add `tcp://0.0.0.0:2375` to the `hosts` configurat `hosts` property reference: https://docs.docker.com/engine/reference/commandline/dockerd/ +#### Security warning + +Making the Docker daemon available over TCP is potentially dangerous: as you +can read [here](https://docs.docker.com/engine/security/#docker-daemon-attack-surface), +the docker daemon typically has broad privileges, so only trusted users should +have access to it. If you expose the daemon over TCP, you must use firewalling +to make sure only trusted users can access the port. This also includes making +sure the interpreter docker containers that are started by Zeppelin do not have +access to this port. ## Quickstart @@ -79,7 +88,7 @@ vi `/etc/docker/daemon.json`, Add `tcp://0.0.0.0:2375` to the `hosts` configurat Set to the same time zone as the zeppelin server, keeping the time zone in the interpreter docker container the same as the server. E.g, `"America/New_York"` or `"Asia/Shanghai"` ```bash - export DOCKER_TIME_ZONE="America/New_York" + export ZEPPELIN_DOCKER_TIME_ZONE="America/New_York" ``` @@ -140,7 +149,6 @@ Zeppelin service runs on local server, it auto configure itself to use `DockerIn - Keytab file configured in the interpreter properties - zeppelin.shell.keytab.location - spark.yarn.keytab - - submarine.hadoop.keytab - zeppelin.jdbc.keytab.location - zeppelin.server.kerberos.keytab diff --git a/docs/quickstart/flink_with_zeppelin.md b/docs/quickstart/flink_with_zeppelin.md new file mode 100644 index 00000000000..70f7970b997 --- /dev/null +++ b/docs/quickstart/flink_with_zeppelin.md @@ -0,0 +1,42 @@ +--- +layout: page +title: "Flink with Zeppelin" +description: "" +group: quickstart +--- + +{% include JB/setup %} + +# Flink support in Zeppelin + +
    + +
    + +For a brief overview of Apache Flink fundamentals with Apache Zeppelin, see the following guide: + +- **built-in** Apache Flink integration. +- With [Flink Scala Scala](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/repls/scala_shell/) [PyFlink Shell](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/deployment/repls/python_shell/), [Flink SQL](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/dev/table/sql/overview/) +- Inject ExecutionEnvironment, StreamExecutionEnvironment, BatchTableEnvironment, StreamTableEnvironment. +- Canceling job and displaying its progress +- Supports different modes: local, remote, yarn, yarn-application +- Dependency management +- Streaming Visualization + +
    + +For the further information about Flink support in Zeppelin, please check + +- [Flink Interpreter](../interpreter/flink.html) diff --git a/docs/quickstart/install.md b/docs/quickstart/install.md index aa14d9ffa99..c4e57692aa8 100644 --- a/docs/quickstart/install.md +++ b/docs/quickstart/install.md @@ -35,8 +35,8 @@ Apache Zeppelin officially supports and is tested on the following environments: Value - OpenJDK or Oracle JDK - 1.8 (151+)
    (set JAVA_HOME) + Java + JDK 11
    (set JAVA_HOME) OS @@ -50,7 +50,7 @@ Two binary packages are available on the [download page](http://zeppelin.apache. - **all interpreter package**: unpack it in a directory of your choice and you're ready to go. - **net-install interpreter package**: only spark, python, markdown and shell interpreter included. Unpack and follow [install additional interpreters](../usage/interpreter/installation.html) to install other interpreters. If you're unsure, just run `./bin/install-interpreter.sh --all` and install all interpreters. - + ### Building Zeppelin from source Follow the instructions [How to Build](../setup/basics/how_to_build.html), If you want to build from source instead of using binary package. @@ -67,9 +67,11 @@ bin/zeppelin-daemon.sh start After Zeppelin has started successfully, go to [http://localhost:8080](http://localhost:8080) with your web browser. -By default Zeppelin is listening at `127.0.0.1:8080`, so you can't access it when it is deployed in another remote machine. +By default Zeppelin is listening at `127.0.0.1:8080`, so you can't access it when it is deployed on another remote machine. To access a remote Zeppelin, you need to change `zeppelin.server.addr` to `0.0.0.0` in `conf/zeppelin-site.xml`. +Check log file at `ZEPPELIN_HOME/logs/zeppelin-server-*.log` if you can not open Zeppelin. + #### Stopping Zeppelin ``` @@ -84,15 +86,27 @@ Make sure that [docker](https://www.docker.com/community-edition) is installed i Use this command to launch Apache Zeppelin in a container. ```bash -docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin:0.9.0 +docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin:0.10.0 ``` + To persist `logs` and `notebook` directories, use the [volume](https://docs.docker.com/engine/reference/commandline/run/#mount-volume--v-read-only) option for docker container. ```bash -docker run -p 8080:8080 --rm -v $PWD/logs:/logs -v $PWD/notebook:/notebook \ +docker run -u $(id -u) -p 8080:8080 --rm -v $PWD/logs:/logs -v $PWD/notebook:/notebook \ -e ZEPPELIN_LOG_DIR='/logs' -e ZEPPELIN_NOTEBOOK_DIR='/notebook' \ - --name zeppelin apache/zeppelin:0.9.0 + --name zeppelin apache/zeppelin:0.10.0 +``` + +`-u $(id -u)` is to make sure you have the permission to write logs and notebooks. + +For many interpreters, they require other dependencies, e.g. Spark interpreter requires Spark binary distribution +and Flink interpreter requires Flink binary distribution. You can also mount them via docker volumn. e.g. + +```bash +docker run -u $(id -u) -p 8080:8080 --rm -v /mnt/disk1/notebook:/notebook \ +-v /usr/lib/spark-current:/opt/spark -v /mnt/disk1/flink-1.12.2:/opt/flink -e FLINK_HOME=/opt/flink \ +-e SPARK_HOME=/opt/spark -e ZEPPELIN_NOTEBOOK_DIR='/notebook' --name zeppelin apache/zeppelin:0.10.0 ``` If you have trouble accessing `localhost:8080` in the browser, Please clear browser cache. @@ -146,13 +160,15 @@ Congratulations, you have successfully installed Apache Zeppelin! Here are a few #### New to Apache Zeppelin... * For an in-depth overview, head to [Explore Zeppelin UI](../quickstart/explore_ui.html). - * And then, try run [Tutorial Notebook](http://localhost:8080/#/notebook/2A94M5J1Z) in your Zeppelin. + * And then, try run Tutorial Notebooks shipped with your Zeppelin distribution. * And see how to change [configurations](../setup/operation/configuration.html) like port number, etc. -#### Spark, Python, SQL, and more +#### Spark, Flink, SQL, Python, R and more * [Spark support in Zeppelin](./spark_with_zeppelin.html), to know more about deep integration with [Apache Spark](http://spark.apache.org/). + * [Flink support in Zeppelin](./flink_with_zeppelin.html), to know more about deep integration with [Apache Flink](http://flink.apache.org/). * [SQL support in Zeppelin](./sql_with_zeppelin.html) for SQL support * [Python support in Zeppelin](./python_with_zeppelin.html), for Matplotlib, Pandas, Conda/Docker integration. + * [R support in Zeppelin](./r_with_zeppelin.html) * [All Available Interpreters](../#available-interpreters) #### Multi-user support ... diff --git a/docs/quickstart/kubernetes.md b/docs/quickstart/kubernetes.md index 1c0b99af72e..470614f2f04 100644 --- a/docs/quickstart/kubernetes.md +++ b/docs/quickstart/kubernetes.md @@ -34,10 +34,10 @@ Key benefits are - Zeppelin >= 0.9.0 docker image - Spark >= 2.4.0 docker image (in case of using Spark Interpreter) - - A running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) + - A running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) - [Kubernetes DNS](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/) configured in your cluster - Enough cpu and memory in your Kubernetes cluster. We recommend 4CPUs, 6g of memory to be able to start Spark Interpreter with few executors. - + - If you're using [minikube](https://kubernetes.io/docs/setup/minikube/), check your cluster capacity (`kubectl describe node`) and increase if necessary ``` @@ -46,38 +46,77 @@ Key benefits are $ minikube config set memory $ minikube start $ minikube config view - ``` + ``` ## Quickstart -Get `zeppelin-server.yaml` from github repository or find it from Zeppelin distribution package. +Let's first clone the Zeppelin repository from GitHub: + +```sh +git clone https://github.com/apache/zeppelin.git +cd zeppelin +# you can check out to your desired version/branch +# git checkout tags/v0.10.1 +# just make sure you check the version inside "./pom.xml" +``` + +Now we are going to create the `zeppelin-distribution` image. This may take some time and this image will be used as a base for the upcoming required images: +```sh +docker build -t zeppelin-distribution:latest -f ./Dockerfile . ``` -# Get it from Zeppelin distribution package. -$ ls /k8s/zeppelin-server.yaml -# or download it from github -$ curl -s -O https://raw.githubusercontent.com/apache/zeppelin/master/k8s/zeppelin-server.yaml +Next, we will build our `zeppelin-server` image: + +```sh +cd scripts/docker/zeppelin-server +# Looking at the "./pom.xml" we can see the version is 0.12.0-SNAPSHOT +# Let's set the correct version in our Dockerfile: +# vi Dockerfile +# ARG version="0.12.0-SNAPSHOT" +# Once you saved the Dockerfile with the correct version we can build our image: +docker build -t zeppelin-server:0.12.0-SNAPSHOT -f ./Dockerfile . ``` -Start zeppelin on kubernetes cluster, +The last image we build is `zeppelin-interpreter`: + +```sh +cd scripts/docker/zeppelin-interpreter +docker build -t zeppelin-interpreter:0.12.0-SNAPSHOT -f ./Dockerfile . +``` + +So we should now have the following images: + +```sh +# sudo if you are on Linux and Docker requires root +$ docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +zeppelin-interpreter 0.12.0-SNAPSHOT 4f77fe989eed 3 minutes ago 622MB +zeppelin-server 0.12.0-SNAPSHOT 4f77fe989eed 3 minutes ago 622MB +zeppelin-distribution latest bd2fb4b321d2 40 minutes ago 1.27GB ``` + +Reminder: Please adjust the images in the YAML-File of `zeppelin-server.yaml` + +Start zeppelin on Kubernetes cluster, + +```sh kubectl apply -f zeppelin-server.yaml ``` Port forward Zeppelin server port, - -``` + +```sh kubectl port-forward zeppelin-server 8080:80 ``` and browse [localhost:8080](http://localhost:8080). -Try run some paragraphs and see each interpreter is running as a Pod (using `kubectl get pods`), instead of a local process. +Try running some paragraphs and see if each interpreter is running as a Pod (using `kubectl get pods`), instead of a local process. -To shutdown, +To shut down, -``` +```sh kubectl delete -f zeppelin-server.yaml ``` @@ -104,7 +143,7 @@ Create note and configure executor number (default 1) ``` %spark.conf spark.executor.instances 5 -``` +``` And then start your spark interpreter @@ -114,7 +153,7 @@ sc.parallelize(1 to 100).count ... ``` While `spark.master` property of SparkInterpreter starts with `k8s://` (default `k8s://https://kubernetes.default.svc` when Zeppelin started using zeppelin-server.yaml), Spark executors will be automatically created in your Kubernetes cluster. -Spark UI is accessible by clicking `SPARK JOB` on the Paragraph. +Spark UI is accessible by clicking `SPARK JOB` on the Paragraph. Check [here](https://spark.apache.org/docs/latest/running-on-kubernetes.html) to know more about Running Spark on Kubernetes. @@ -124,13 +163,13 @@ Check [here](https://spark.apache.org/docs/latest/running-on-kubernetes.html) to To build your own Zeppelin image, first build Zeppelin project with `-Pbuild-distr` flag. ``` -$ mvn package -DskipTests -Pbuild-distr +$ ./mvnw package -DskipTests -Pbuild-distr ``` Binary package will be created under `zeppelin-distribution/target` directory. Move created package file under `scripts/docker/zeppelin/bin/` directory. ``` -$ mv zeppelin-distribution/target/zeppelin-*.tar.gz scripts/docker/zeppelin/bin/ +$ mv zeppelin-distribution/target/zeppelin-*-bin.tgz scripts/docker/zeppelin/bin/ ``` `scripts/docker/zeppelin/bin/Dockerfile` downloads package from internet. Modify the file to add package from filesystem. @@ -155,7 +194,7 @@ Then build docker image. ``` # configure docker env, if you're using minikube -$ eval $(minikube docker-env) +$ eval $(minikube docker-env) # change directory $ cd scripts/docker/zeppelin/bin/ @@ -245,9 +284,11 @@ to customize, 4. Run a paragraph will create an interpreter using modified yaml files. The interpreter pod can also be customized through the interpreter settings. Here are some of the properties: + | Property Name | Default Value | Description | | ----- | ----- | ----- | -| `zeppelin.k8s.namespace` | `default` | The Kubernetes namespace to use. | +| `zeppelin.k8s.interpreter.namespace` | `default` | Specify the namespace of the current interpreter. Users can set different namespaces for different interpreters. In order to minimize permissions, the interpreter pod can only be created in the `default` namespace by default. If users need to create an interpreter pod in other namespaces, they need to add the corresponding `rolebinding` in `k8s/zeppelin-server.yaml`.| +| `zeppelin.k8s.interpreter.serviceAccount` | `default` | The Kubernetes service account to use. | | `zeppelin.k8s.interpreter.container.image` | `apache/zeppelin:` | The interpreter image to use. | | `zeppelin.k8s.interpreter.cores` | (optional) | The number of cpu cores to use. | | `zeppelin.k8s.interpreter.memory` | (optional) | The memory to use, e.g., `1g`. | @@ -256,6 +297,9 @@ The interpreter pod can also be customized through the interpreter settings. Her | `zeppelin.k8s.interpreter.imagePullSecrets` | (optional) | Set the comma-separated list of Kubernetes secrets while pulling images, e.g., `mysecret1,mysecret2` | | `zeppelin.k8s.interpreter.container.imagePullPolicy` | (optional) | Set the pull policy of the interpreter image, e.g., `Always` | | `zeppelin.k8s.spark.container.imagePullPolicy` | (optional) | Set the pull policy of the spark image, e.g., `Always` | +| `zeppelin.spark.uiWebUrl` | `//{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}}` | The URL for user to access Spark UI. The default value is a [jinjava](https://github.com/HubSpot/jinjava) template that contains three variables. | +| `zeppelin.k8s.spark.useIngress` | (optional) | If true, the [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) will be created when creating the spark interpreter. So users can access the Spark UI through Ingress. | +| `zeppelin.k8s.spark.ingress.host` | `{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}}` | If `zeppelin.k8s.spark.useIngress` is `true`, it configures the `host` value of the Ingress. The default value is a [jinjava](https://github.com/HubSpot/jinjava) template that contains three variables. Users can access the Spark UI through a customized `zeppelin.k8s.spark.ingress.host`. | ## Future work diff --git a/docs/quickstart/python_with_zeppelin.md b/docs/quickstart/python_with_zeppelin.md index 80237f8c4a6..76b3d5883e3 100644 --- a/docs/quickstart/python_with_zeppelin.md +++ b/docs/quickstart/python_with_zeppelin.md @@ -27,16 +27,17 @@ limitations under the License. The following guides explain how to use Apache Zeppelin that enables you to write in Python: +- supports [vanilla python](../interpreter/python.html#vanilla-python-interpreter-python) and [ipython](../interpreter/python.html#ipython-interpreter-pythonipython-recommended) - supports flexible python environments using [conda](../interpreter/python.html#conda), [docker](../interpreter/python.html#docker) - can query using [PandasSQL](../interpreter/python.html#sql-over-pandas-dataframes) - also, provides [PySpark](../interpreter/spark.html) +- [run python interpreter in yarn cluster](../interpreter/python.html#run-python-in-yarn-cluster) with customized conda python environment. - with [matplotlib integration](../interpreter/python.html#matplotlib-integration) -- support [ipython](../interpreter/python.html#ipython-interpreter-pythonipython-recommended) - can create results including **UI widgets** using [Dynamic Form](../interpreter/python.html#using-zeppelin-dynamic-forms)
    -For the further information about Spark support in Zeppelin, please check +For the further information about Python support in Zeppelin, please check - [Python Interpreter](../interpreter/python.html) diff --git a/docs/quickstart/r_with_zeppelin.md b/docs/quickstart/r_with_zeppelin.md new file mode 100644 index 00000000000..f9b9feb6596 --- /dev/null +++ b/docs/quickstart/r_with_zeppelin.md @@ -0,0 +1,42 @@ +--- +layout: page +title: "R with Zeppelin" +description: "" +group: quickstart +--- + +{% include JB/setup %} + +# R support in Zeppelin + +
    + +
    + +The following guides explain how to use Apache Zeppelin that enables you to write in R: + +- Supports [vanilla R](../interpreter/r.html#how-to-use-r-interpreter) and [IRkernel](../interpreter/r.html#how-to-use-r-interpreter) +- Visualize R dataframe via [ZeppelinContext](../interpreter/r.html#zshow) +- [Run R interpreter in yarn cluster](../interpreter/r.html#run-r-in-yarn-cluster) with customized conda R environment. +- [Make R Shiny App] (../interpreter/r.html#make-shiny-app-in-zeppelin) + +
    + +For the further information about R support in Zeppelin, please check + +- [R Interpreter](../interpreter/r.html) + + + diff --git a/docs/quickstart/spark_with_zeppelin.md b/docs/quickstart/spark_with_zeppelin.md index 6b35beb2af2..7afa608e741 100644 --- a/docs/quickstart/spark_with_zeppelin.md +++ b/docs/quickstart/spark_with_zeppelin.md @@ -28,12 +28,13 @@ limitations under the License. For a brief overview of Apache Spark fundamentals with Apache Zeppelin, see the following guide: - **built-in** Apache Spark integration. -- with [SparkSQL](http://spark.apache.org/sql/), [PySpark](https://spark.apache.org/docs/latest/api/python/pyspark.html), [SparkR](https://spark.apache.org/docs/latest/sparkr.html) -- inject [SparkContext](https://spark.apache.org/docs/latest/api/java/org/apache/spark/SparkContext.html), [SQLContext](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [SparkSession](https://spark.apache.org/docs/latest/sql-programming-guide.html) automatically -- canceling job and displaying its progress -- supporting [Spark Cluster Mode](../setup/deployment/spark_cluster_mode.html#apache-zeppelin-on-spark-cluster-mode) for external spark clusters -- supports [different context per user / note](../usage/interpreter/interpreter_binding_mode.html) -- sharing variables among PySpark, SparkR and Spark through [ZeppelinContext](../interpreter/spark.html#zeppelincontext) +- With [Spark Scala](https://spark.apache.org/docs/latest/quick-start.html) [SparkSQL](http://spark.apache.org/sql/), [PySpark](https://spark.apache.org/docs/latest/api/python/), [SparkR](https://spark.apache.org/docs/latest/sparkr.html) +- Inject [SparkContext](https://spark.apache.org/docs/latest/api/java/org/apache/spark/SparkContext.html), [SQLContext](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [SparkSession](https://spark.apache.org/docs/latest/sql-programming-guide.html) automatically +- Canceling job and displaying its progress +- Supports different modes: local, standalone, yarn(client & cluster), k8s +- Dependency management +- Supports [different context per user / note](../usage/interpreter/interpreter_binding_mode.html) +- Sharing variables among PySpark, SparkR and Spark through [ZeppelinContext](../interpreter/spark.html#zeppelincontext) - [Livy Interpreter](../interpreter/livy.html)
    diff --git a/docs/quickstart/sql_with_zeppelin.md b/docs/quickstart/sql_with_zeppelin.md index df63ccd3813..d82cd61abf5 100644 --- a/docs/quickstart/sql_with_zeppelin.md +++ b/docs/quickstart/sql_with_zeppelin.md @@ -33,16 +33,21 @@ The following guides explain how to use Apache Zeppelin that enables you to writ * [MariaDB](../interpreter/jdbc.html#mariadb) * [AWS Redshift](../interpreter/jdbc.html#redshift) * [Apache Hive](../interpreter/jdbc.html#apache-hive) + * [Presto/Trino](../interpreter/jdbc.html#prestotrino) + * [Impala](../interpreter/jdbc.html#impala) + * [Apache Kyuubi](../interpreter/jdbc.html#apache-kyuubi) * [Apache Phoenix](../interpreter/jdbc.html#apache-phoenix) * [Apache Drill](../interpreter/jdbc.html#apache-drill) * [Apache Tajo](../interpreter/jdbc.html#apache-tajo) * and so on - [Spark Interpreter](../interpreter/spark.html) supports [SparkSQL](http://spark.apache.org/sql/) +- [Flink Interpreter](../interpreter/flink.html) supports [Flink SQL](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/dev/table/sql/overview/) - [Python Interpreter](../interpreter/python.html) supports [pandasSQL](../interpreter/python.html#sql-over-pandas-dataframes) - can create query result including **UI widgets** using [Dynamic Form](../usage/dynamic_form/intro.html) ```sql - %sql + %sql + select age, count(1) value from bank where age < ${maxAge=30} @@ -56,9 +61,8 @@ For the further information about SQL support in Zeppelin, please check - [JDBC Interpreter](../interpreter/jdbc.html) - [Spark Interpreter](../interpreter/spark.html) +- [Flink Interpreter](../interpreter/flink.html) - [Python Interpreter](../interpreter/python.html) -- [IgniteSQL Interpreter](../interpreter/ignite.html#ignite-sql-interpreter) for [Apache Ignite](https://ignite.apache.org/) -- [Kylin Interpreter](../interpreter/kylin.html) for [Apache Kylin](http://kylin.apache.org/) diff --git a/docs/quickstart/yarn.md b/docs/quickstart/yarn.md index 60fb48e2fb2..19808a26b47 100644 --- a/docs/quickstart/yarn.md +++ b/docs/quickstart/yarn.md @@ -19,11 +19,11 @@ limitations under the License. --> {% include JB/setup %} -# Zeppelin on Yarn +# Zeppelin Interpreter on Yarn
    -Zeppelin on yarn means to run interpreter process in yarn container. The key benefit is the scalability, you won't run out of memory +Zeppelin is able to run interpreter process in yarn container. The key benefit is the scalability, you won't run out of memory of the zeppelin server host if you run large amount of interpreter processes. ## Prerequisites @@ -64,6 +64,11 @@ Besides that, you can also specify other properties as following table. default yarn queue name + + zeppelin.interpreter.yarn.node.label.expression + + yarn node label expression specified for interpreter process + ## Differences with non-yarn interpreter mode (local mode) diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md index 7f70c33b5c1..99951a9353a 100644 --- a/docs/setup/basics/how_to_build.md +++ b/docs/setup/basics/how_to_build.md @@ -61,7 +61,7 @@ git clone https://github.com/apache/zeppelin.git You can build Zeppelin with following maven command: ```bash -mvn clean package -DskipTests [Options] +./mvnw clean package -DskipTests [Options] ``` Check [build-profiles](#build-profiles) section for further build options. @@ -79,23 +79,11 @@ You can directly start Zeppelin by running the following command after successfu ### Build profiles - -#### Scala profile - -To be noticed, this scala profile affect the modules (e.g. cassandra, scalding) that use scala except Spark interpreter (Spark interpreter use other profiles to control its scala version, see the doc below). - -Set scala version (default 2.10). Available profiles are - -``` --Pscala-2.10 --Pscala-2.11 -``` - #### Spark Interpreter -To be noticed, the spark profiles here only affect the embedded mode (no need to specify `SPARK_HOME`) of spark interpreter. +To be noticed, the spark profiles here only affect the unit test (no need to specify `SPARK_HOME`) of spark interpreter. Zeppelin doesn't require you to build with different spark to make different versions of spark work in Zeppelin. -You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 1.6 to 3.0. +You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 3.3 to 3.5. To build with a specific Spark version or scala versions, define one or more of the following profiles and options: @@ -106,43 +94,34 @@ Set spark major version Available profiles are ``` --Pspark-3.0 --Pspark-2.4 --Pspark-2.3 --Pspark-2.2 --Pspark-2.1 --Pspark-2.0 --Pspark-1.6 +-Pspark-3.5 +-Pspark-3.4 +-Pspark-3.3 ``` minor version can be adjusted by `-Dspark.version=x.x.x` ##### `-Pspark-scala-[version] (optional)` -To be noticed, these profiles also only affect the embedded mode (no need to specify `SPARK_HOME`) of Spark interpreter. -Actually Zeppelin supports all the versions of scala (2.10, 2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`. +To be noticed, these profiles also only affect the unit test (no need to specify `SPARK_HOME`) of Spark interpreter. +Actually Zeppelin supports all the versions of scala (2.12, 2.13) in Spark interpreter as long as you specify `SPARK_HOME`. Available profiles are ``` --Pspark-scala-2.10 --Pspark-scala-2.11 -Pspark-scala-2.12 +-Pspark-scala-2.13 ``` - -If you want to use Spark 3.x in the embedded mode, then you have to specify both profile `spark-3.0` and `spark-scala-2.12`, -because Spark 3.x doesn't support scala 2.10 and 2.11. #### Build hadoop with Zeppelin (`-Phadoop[version]`) To be noticed, hadoop profiles only affect Zeppelin server, it doesn't affect any interpreter. Zeppelin server use hadoop in some cases, such as using hdfs as notebook storage. You can check this [page](./hadoop_integration.html) for more details about how to configure hadoop in Zeppelin. -Set hadoop major version (default hadoop2). +Set hadoop major version (default hadoop3). Available profiles are ``` --Phadoop2 -Phadoop3 ``` @@ -163,29 +142,18 @@ Build examples under zeppelin-examples directory Here are some examples with several options: ```bash -# build with spark-3.0, spark-scala-2.12 -mvn clean package -Pspark-3.0 -Pspark-scala-2.12 -DskipTests - -# build with spark-2.4, spark-scala-2.11 -mvn clean package -Pspark-2.4 -Pspark-scala-2.11 -DskipTests +# build with spark-3.3, spark-scala-2.12 +./mvnw clean package -Pspark-3.3 -Pspark-scala-2.12 -DskipTests -# build with spark-1.6, spark-scala-2.10 -mvn clean package -Pspark-1.6 -Pspark-scala-2.10 -DskipTests +# build with spark-3.4, spark-scala-2.13 +./mvnw clean package -Pspark-3.4 -Pspark-scala-2.13 -DskipTests -# build with CDH -mvn clean package -Pspark-1.6 -Pspark-scala-2.10 -Dhadoop.version=2.6.0-cdh5.5.0 -Pvendor-repo -DskipTests ``` Ignite Interpreter ```bash -mvn clean package -Dignite.version=1.9.0 -DskipTests -``` - -Scalding Interpreter - -```bash -mvn clean package -Pscalding -DskipTests +./mvnw clean package -Dignite.version=1.9.0 -DskipTests ``` ### Optional configurations @@ -204,7 +172,7 @@ spark.bin.download.url # default http://d3kbcqa49mib13.cloudfront.net/${spark.ar Py4J package ```bash -python.py4j.version # default 0.9.2 +python.py4j.version # default 0.10.9.7 pypi.repo.url # default https://pypi.python.org/packages python.py4j.repo.folder # default /64/5c/01e13b68e8caafece40d549f232c9b5677ad1016071a48d04cc3895acaa3 ``` @@ -218,7 +186,7 @@ Frontend Maven Plugin configurations ``` plugin.frontend.nodeDownloadRoot # default https://nodejs.org/dist/ -plugin.frontend.npmDownloadRoot # default http://registry.npmjs.org/npm/-/ +plugin.frontend.npmDownloadRoot # default https://registry.npmjs.org/npm/-/ plugin.frontend.yarnDownloadRoot # default https://github.com/yarnpkg/yarn/releases/download/ ``` @@ -239,23 +207,11 @@ sudo apt-get install r-base-dev sudo apt-get install r-cran-evaluate ``` - - -### Install maven - -```bash -wget http://www.eu.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz -sudo tar -zxf apache-maven-3.6.3-bin.tar.gz -C /usr/local/ -sudo ln -s /usr/local/apache-maven-3.6.3/bin/mvn /usr/local/bin/mvn -``` - _Notes:_ - Ensure node is installed by running `node --version` - - Ensure maven is running version 3.6.3 or higher with `mvn -version` + - Ensure maven is running version 3.6.3 or higher with `./mvnw -version` - Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxMetaspaceSize=512m"` - - ## Proxy setting (optional) If you're behind the proxy, you'll need to configure maven and npm to pass through it. @@ -325,16 +281,16 @@ _Notes:_ To package the final distribution including the compressed archive, run: ```sh -mvn clean package -Pbuild-distr +./mvnw clean package -Pbuild-distr ``` To build a distribution with specific profiles, run: ```sh -mvn clean package -Pbuild-distr -Pspark-2.4 +./mvnw clean package -Pbuild-distr -Pspark-3.4 ``` -The profiles `-Pspark-2.4` can be adjusted if you wish to build to a specific spark versions. +The profiles `-Pspark-3.4` can be adjusted if you wish to build to a specific spark versions. The archive is generated under _`zeppelin-distribution/target`_ directory diff --git a/docs/setup/deployment/cdh.md b/docs/setup/deployment/cdh.md index 20f819b4ee5..485cd34935d 100644 --- a/docs/setup/deployment/cdh.md +++ b/docs/setup/deployment/cdh.md @@ -25,7 +25,7 @@ limitations under the License. ### 1. Import Cloudera QuickStart Docker image ->[Cloudera](http://www.cloudera.com/) has officially provided CDH Docker Hub in their own container. Please check [this guide page](http://www.cloudera.com/documentation/enterprise/latest/topics/quickstart_docker_container.html#cloudera_docker_container) for more information. +>[Cloudera](http://www.cloudera.com/) has officially provided CDH Docker Hub in their own container. Please check [this guide page](https://hub.docker.com/r/cloudera/quickstart/) for more information. You can import the Docker image by pulling it from Cloudera Docker Hub. diff --git a/docs/setup/deployment/flink_and_spark_cluster.md b/docs/setup/deployment/flink_and_spark_cluster.md index c7936511721..df5df80d9ad 100644 --- a/docs/setup/deployment/flink_and_spark_cluster.md +++ b/docs/setup/deployment/flink_and_spark_cluster.md @@ -20,6 +20,8 @@ limitations under the License. {% include JB/setup %} +This document is outdated, it is not verified in the latest Zeppelin. + # Install with Flink and Spark cluster
    @@ -40,8 +42,8 @@ Assuming the minimal install, there are several programs that we will need to in - git - openssh-server -- OpenJDK 7 -- Maven 3.1+ +- OpenJDK 11 +- Maven For git, openssh-server, and OpenJDK 7 we will be using the apt package manager. @@ -58,45 +60,10 @@ sudo apt-get install git sudo apt-get install openssh-server ``` -##### OpenJDK 7 - -```bash -sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib -``` -*A note for those using Ubuntu 16.04*: To install `openjdk-7` on Ubuntu 16.04, one must add a repository. [Source](http://askubuntu.com/questions/761127/ubuntu-16-04-and-openjdk-7) - -```bash -sudo add-apt-repository ppa:openjdk-r/ppa -sudo apt-get update -sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib -``` - -##### Maven 3.1+ -Zeppelin requires maven version 3.x. The version available in the repositories at the time of writing is 2.x, so maven must be installed manually. - -Purge any existing versions of maven. - -```bash -sudo apt-get purge maven maven2 -``` - -Download the maven 3.3.9 binary. - -```bash -wget "http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz" -``` - -Unarchive the binary and move to the `/usr/local` directory. - -```bash -tar -zxvf apache-maven-3.3.9-bin.tar.gz -sudo mv ./apache-maven-3.3.9 /usr/local -``` - -Create symbolic links in `/usr/bin`. +##### OpenJDK 11 ```bash -sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/bin/mvn +sudo apt-get install openjdk-11-jdk ``` ### Installing Zeppelin @@ -118,26 +85,23 @@ cd zeppelin Package Zeppelin. ```bash -mvn clean package -DskipTests -Pspark-1.6 -Dflink.version=1.1.3 -Pscala-2.10 +./mvnw clean package -DskipTests -Pspark-3.5 -Pflink-1.17 ``` `-DskipTests` skips build tests- you're not developing (yet), so you don't need to do tests, the clone version *should* build. -`-Pspark-1.6` tells maven to build a Zeppelin with Spark 1.6. This is important because Zeppelin has its own Spark interpreter and the versions must be the same. +`-Pspark-3.5` tells maven to build a Zeppelin with Spark 3.5. This is important because Zeppelin has its own Spark interpreter and the versions must be the same. -`-Dflink.version=1.1.3` tells maven specifically to build Zeppelin with Flink version 1.1.3. +`-Pflink-1.17` tells maven to build a Zeppelin with Flink 1.17. --`-Pscala-2.10` tells maven to build with Scala v2.10. - - -**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 1.6 was the most recent Spark version available. +**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 3.5 was the most recent Spark version available. **Note:** On build failures. Having installed Zeppelin close to 30 times now, I will tell you that sometimes the build fails for seemingly no reason. As long as you didn't edit any code, it is unlikely the build is failing because of something you did. What does tend to happen, is some dependency that maven is trying to download is unreachable. If your build fails on this step here are some tips: - Don't get discouraged. - Scroll up and read through the logs. There will be clues there. -- Retry (that is, run the `mvn clean package -DskipTests -Pspark-1.6` again) +- Retry (that is, run the `./mvnw clean package -DskipTests -Pspark-3.5` again) - If there were clues that a dependency couldn't be downloaded wait a few hours or even days and retry again. Open source software when compiling is trying to download all of the dependencies it needs, if a server is off-line there is nothing you can do but wait for it to come back. - Make sure you followed all of the steps carefully. - Ask the community to help you. Go [here](http://zeppelin.apache.org/community.html) and join the user mailing list. People are there to help you. Make sure to copy and paste the build output (everything that happened in the console) and include that in your message. @@ -251,16 +215,16 @@ Building from source is recommended where possible, for simplicity in this tuto To download the Flink Binary use `wget` ```bash -wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.1.3/flink-1.1.3-bin-hadoop24-scala_2.10.tgz" -tar -xzvf flink-1.1.3-bin-hadoop24-scala_2.10.tgz +wget "https://archive.apache.org/dist/flink/flink-1.17.1/flink-1.17.1-bin-scala_2.12.tgz" +tar -xzvf flink-1.17.1-bin-scala_2.12.tgz ``` -This will download Flink 1.1.3, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `24` to your appropriate version. +This will download Flink 1.17.1. Start the Flink Cluster. ```bash -flink-1.1.3/bin/start-cluster.sh +flink-1.17.1/bin/start-cluster.sh ``` ###### Building From source @@ -269,13 +233,13 @@ If you wish to build Flink from source, the following will be instructive. Note See the [Flink Installation guide](https://github.com/apache/flink/blob/master/README.md) for more detailed instructions. -Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.1.3-rc2, and build. +Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.17.1, and build. ```bash cd $HOME git clone https://github.com/apache/flink.git cd flink -git checkout release-1.1.3-rc2 +git checkout release-1.17.1 mvn clean install -DskipTests ``` @@ -297,8 +261,8 @@ If no task managers are present, restart the Flink cluster with the following co (if binaries) ```bash -flink-1.1.3/bin/stop-cluster.sh -flink-1.1.3/bin/start-cluster.sh +flink-1.17.1/bin/stop-cluster.sh +flink-1.17.1/bin/start-cluster.sh ``` @@ -310,7 +274,7 @@ build-target/bin/start-cluster.sh ``` -##### Spark 1.6 Cluster +##### Spark Cluster ###### Download Binaries @@ -321,12 +285,12 @@ Using binaries is also To download the Spark Binary use `wget` ```bash -wget "http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz" -tar -xzvf spark-1.6.3-bin-hadoop2.6.tgz -mv spark-1.6.3-bin-hadoop2.6 spark +wget "https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz" +tar -xzvf spark-3.5.2-bin-hadoop3.tgz +mv spark-3.5.2-bin-hadoop3 spark ``` -This will download Spark 1.6.3, compatible with Hadoop 2.6. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `2.6` to your appropriate version. +This will download Spark 3.5.2, compatible with Hadoop 3. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `3` to your appropriate version. ###### Building From source @@ -334,21 +298,18 @@ Spark is an extraordinarily large project, which takes considerable time to down See the [Spark Installation](https://github.com/apache/spark/blob/master/README.md) guide for more detailed instructions. -Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-1.6, and build. -**Note:** Recall, we're only checking out 1.6 because it is the most recent Spark for which a Zeppelin profile exists at - the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark. However if you use Spark 2.0, the word count example will need to be changed as Spark 2.0 is not compatible with the following examples. - +Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-3.5, and build. ```bash cd $HOME ``` -Clone, check out, and build Spark version 1.6.x. +Clone, check out, and build Spark version 3.5.x. ```bash git clone https://github.com/apache/spark.git cd spark -git checkout branch-1.6 +git checkout branch-3.5 mvn clean package -DskipTests ``` diff --git a/docs/setup/deployment/virtual_machine.md b/docs/setup/deployment/virtual_machine.md index a50d1a2ba52..0578b9caa7f 100644 --- a/docs/setup/deployment/virtual_machine.md +++ b/docs/setup/deployment/virtual_machine.md @@ -33,14 +33,14 @@ For SparkR users, this script includes several helpful [R Libraries](#r-extras). ### Prerequisites -This script requires three applications, [Ansible](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip "Ansible"), [Vagrant](http://www.vagrantup.com "Vagrant") and [Virtual Box](https://www.virtualbox.org/ "Virtual Box"). All of these applications are freely available as Open Source projects and extremely easy to set up on most operating systems. +This script requires three applications, [Ansible](https://www.ansible.com/ "Ansible"), [Vagrant](http://www.vagrantup.com "Vagrant") and [Virtual Box](https://www.virtualbox.org/ "Virtual Box"). All of these applications are freely available as Open Source projects and extremely easy to set up on most operating systems. ## Create a Zeppelin Ready VM If you are running Windows and don't yet have python installed, [install Python 2.7.x](https://www.python.org/downloads/release/python-2710/) first. 1. Download and Install Vagrant: [Vagrant Downloads](http://www.vagrantup.com/downloads.html) -2. Install Ansible: [Ansible Python pip install](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip) +2. Install Ansible: [Ansible Python pip install](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#pip-install) ```bash sudo easy_install pip @@ -86,7 +86,6 @@ By default, Vagrant will share your project directory (the directory with the Va Running the following commands in the guest machine should display these expected versions: * `node --version` should report *v0.12.7* -* `mvn --version` should report *Apache Maven 3.3.9* and *Java version: 1.7.0_85* The virtual machine consists of: @@ -108,7 +107,7 @@ This assumes you've already cloned the project either on the host machine in the ```bash cd /zeppelin -mvn clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests +./mvnw clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests ./bin/zeppelin-daemon.sh start ``` diff --git a/docs/setup/deployment/yarn_install.md b/docs/setup/deployment/yarn_install.md index b130272a0c1..994180126e3 100644 --- a/docs/setup/deployment/yarn_install.md +++ b/docs/setup/deployment/yarn_install.md @@ -118,7 +118,7 @@ bin/zeppelin-daemon.sh stop ``` ## Interpreter -Zeppelin provides various distributed processing frameworks to process data that ranges from Spark, JDBC, Ignite and Lens to name a few. This document describes to configure JDBC & Spark interpreters. +Zeppelin provides various distributed processing frameworks to process data that ranges from Spark and JDBC to name a few. This document describes to configure JDBC & Spark interpreters. ### Hive Zeppelin supports Hive through JDBC interpreter. You might need the information to use Hive and can find in your hive-site.xml diff --git a/docs/setup/operation/configuration.md b/docs/setup/operation/configuration.md index 32d501537cc..e0c769202d0 100644 --- a/docs/setup/operation/configuration.md +++ b/docs/setup/operation/configuration.md @@ -53,7 +53,7 @@ Sources descending by priority: 8080 Zeppelin server port
    Note: Please make sure you're not using the same port with - Zeppelin web application development port (default: 9000). + Zeppelin web application development port (default: 9000).
    ZEPPELIN_SSL_PORT
    @@ -328,7 +328,7 @@ Sources descending by priority:
    ZEPPELIN_INTERPRETER_DEP_MVNREPO
    zeppelin.interpreter.dep.mvnRepo
    - https://repo1.maven.org/maven2/ + https://repo1.maven.org/maven2/,https://repo2.maven.org/maven2/ Remote principal repository for interpreter's additional dependency loading @@ -340,8 +340,8 @@ Sources descending by priority:
    ZEPPELIN_INTERPRETER_CONNECT_TIMEOUT
    zeppelin.interpreter.connect.timeout
    - 30000 - Output message from interpreter exceeding the limit will be truncated + 600s + Interpreter process connect timeout. Default time unit is msec
    ZEPPELIN_DEP_LOCALREPO
    @@ -463,6 +463,18 @@ Sources descending by priority: comma-separated list of folder, where cron is allowed + +
    ZEPPELIN_NOTE_CACHE_THRESHOLD
    +
    zeppelin.note.cache.threshold
    + 50 + Threshold for the number of notes in the cache before an eviction occurs. + + +
    ZEPPELIN_NOTEBOOK_VERSIONED_MODE_ENABLE
    +
    zeppelin.notebook.versioned.mode.enable
    + true + Value to enable/disable version control support in Notes. + diff --git a/docs/setup/operation/monitoring.md b/docs/setup/operation/monitoring.md index 538b115c366..a2fe4434e9e 100644 --- a/docs/setup/operation/monitoring.md +++ b/docs/setup/operation/monitoring.md @@ -27,6 +27,7 @@ Apache Zeppelin is using [Micrometer](https://micrometer.io/) - a vendor-neutral ### Prometheus Monitoring [Prometheus](https://prometheus.io/) is the leading monitoring solution for [Kubernetes](https://kubernetes.io/). The Prometheus endpoint can be activated with the configuration property `zeppelin.metric.enable.prometheus`. The metrics are accessible via the unauthenticated endpoint `/metrics`. +For [Grafana](https://grafana.com/) a good starting point for a dashboard can be found in our [Github Repository](https://github.com/apache/zeppelin/blob/grafana/examples/dashboard.json). ### JMX Monitoring diff --git a/docs/setup/operation/upgrading.md b/docs/setup/operation/upgrading.md index 4b78ee628e7..673fcac59c7 100644 --- a/docs/setup/operation/upgrading.md +++ b/docs/setup/operation/upgrading.md @@ -35,6 +35,9 @@ So, copying `notebook` and `conf` directory should be enough. ## Migration Guide +### Upgrading from Zeppelin 0.9, 0.10 to 0.11 + - From 0.11, The type of `Pegdown` for parsing markdown was deprecated ([ZEPPELIN-5529](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). It will use `Flexmark` instead. + ### Upgrading from Zeppelin 0.8 to 0.9 - From 0.9, we changed the notes file name structure ([ZEPPELIN-2619](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). So when you upgrading zeppelin to 0.9, you need to upgrade note files. Here's steps you need to follow: diff --git a/docs/setup/security/shiro_authentication.md b/docs/setup/security/shiro_authentication.md index 0e3035f74f6..ed99cf813d9 100644 --- a/docs/setup/security/shiro_authentication.md +++ b/docs/setup/security/shiro_authentication.md @@ -99,8 +99,8 @@ group1 = * ``` ## Configure Realm (optional) -Realms are responsible for authentication and authorization in Apache Zeppelin. By default, Apache Zeppelin uses [IniRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/text/IniRealm.html) (users and groups are configurable in `conf/shiro.ini` file under `[user]` and `[group]` section). You can also leverage Shiro Realms like [JndiLdapRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/ldap/JndiLdapRealm.html), [JdbcRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/jdbc/JdbcRealm.html) or create [our own](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/AuthorizingRealm.html). -To learn more about Apache Shiro Realm, please check [this documentation](http://shiro.apache.org/realm.html). +Realms are responsible for authentication and authorization in Apache Zeppelin. By default, Apache Zeppelin uses **IniRealm** (users and groups are configurable in `conf/shiro.ini` file under `[user]` and `[group]` section). You can also leverage Shiro Realms like **JndiLdapRealm**, **JdbcRealm** or create **AuthorizingRealm**. +To learn more about Apache Shiro Realm, please check [this documentation](https://shiro.apache.org/realm.html). We also provide community custom Realms. @@ -151,28 +151,29 @@ The other more flexible option is to use the LdapRealm. It allows for mapping of [main] ldapRealm=org.apache.zeppelin.realm.LdapRealm -ldapRealm.contextFactory.authenticationMechanism=simple -ldapRealm.contextFactory.url=ldap://localhost:33389 -ldapRealm.userDnTemplate=uid={0},ou=people,dc=hadoop,dc=apache,dc=org +ldapRealm.contextFactory.authenticationMechanism = simple +ldapRealm.contextFactory.url = ldap://localhost:33389 +ldapRealm.userDnTemplate = uid={0},ou=people,dc=hadoop,dc=apache,dc=org # Ability to set ldap paging Size if needed default is 100 ldapRealm.pagingSize = 200 -ldapRealm.authorizationEnabled=true -ldapRealm.contextFactory.systemAuthenticationMechanism=simple -ldapRealm.searchBase=dc=hadoop,dc=apache,dc=org +ldapRealm.authorizationEnabled = true +ldapRealm.searchBase = dc=hadoop,dc=apache,dc=org ldapRealm.userSearchBase = dc=hadoop,dc=apache,dc=org ldapRealm.groupSearchBase = ou=groups,dc=hadoop,dc=apache,dc=org -ldapRealm.groupObjectClass=groupofnames +ldapRealm.groupObjectClass = groupofnames # Allow userSearchAttribute to be customized +# If userSearchAttributeName was configured, Zeppelin would use userObjectClass and userSearchAttributeName to search for an actual user DN +# Otherwise, memberAttributeValueTemplate would be used to construct the user DN. ldapRealm.userSearchAttributeName = sAMAccountName -ldapRealm.memberAttribute=member +ldapRealm.memberAttribute = member # force usernames returned from ldap to lowercase useful for AD ldapRealm.userLowerCase = true # ability set searchScopes subtree (default), one, base ldapRealm.userSearchScope = subtree; ldapRealm.groupSearchScope = subtree; -ldapRealm.memberAttributeValueTemplate=cn={0},ou=people,dc=hadoop,dc=apache,dc=org -ldapRealm.contextFactory.systemUsername=uid=guest,ou=people,dc=hadoop,dc=apache,dc=org -ldapRealm.contextFactory.systemPassword=S{ALIAS=ldcSystemPassword} +ldapRealm.memberAttributeValueTemplate = cn={0},ou=people,dc=hadoop,dc=apache,dc=org +ldapRealm.contextFactory.systemUsername = uid=guest,ou=people,dc=hadoop,dc=apache,dc=org +ldapRealm.contextFactory.systemPassword = S{ALIAS=ldcSystemPassword} # enable support for nested groups using the LDAP_MATCHING_RULE_IN_CHAIN operator ldapRealm.groupSearchEnableMatchingRuleInChain = true # optional mapping from physical groups to logical application roles @@ -180,7 +181,7 @@ ldapRealm.rolesByGroup = LDN_USERS: user_role, NYK_USERS: user_role, HKG_USERS: # optional list of roles that are allowed to authenticate. Incase not present all groups are allowed to authenticate (login). # This changes nothing for url specific permissions that will continue to work as specified in [urls]. ldapRealm.allowedRolesForAuthentication = admin_role,user_role -ldapRealm.permissionsByRole= user_role = *:ToDoItemsJdo:*:*, *:ToDoItem:*:*; admin_role = * +ldapRealm.permissionsByRole = user_role = *:ToDoItemsJdo:*:*, *:ToDoItem:*:*; admin_role = * securityManager.sessionManager = $sessionManager securityManager.realms = $ldapRealm ``` @@ -199,8 +200,8 @@ ldapRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/conf/zeppeli ### PAM [PAM](https://en.wikipedia.org/wiki/Pluggable_authentication_module) authentication support allows the reuse of existing authentication -moduls on the host where Zeppelin is running. On a typical system modules are configured per service for example sshd, passwd, etc. under `/etc/pam.d/`. You can -either reuse one of these services or create your own for Zeppelin. Activiting PAM authentication requires two parameters: +modules on the host where Zeppelin is running. On a typical system modules are configured per service for example sshd, passwd, etc. under `/etc/pam.d/`. You can +either reuse one of these services or create your own for Zeppelin. Activating PAM authentication requires two parameters: 1. realm: The Shiro realm being used 2. service: The service configured under `/etc/pam.d/` to be used. The name here needs to be the same as the file name under `/etc/pam.d/` @@ -210,24 +211,11 @@ either reuse one of these services or create your own for Zeppelin. Activiting P pamRealm.service=sshd ``` -### ZeppelinHub -[ZeppelinHub](https://www.zeppelinhub.com) is a service that synchronize your Apache Zeppelin notebooks and enables you to collaborate easily. - -To enable login with your ZeppelinHub credential, apply the following change in `conf/shiro.ini` under `[main]` section. - -``` -### A sample for configuring ZeppelinHub Realm -zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm -## Url of ZeppelinHub -zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com -securityManager.realms = $zeppelinHubRealm -``` - -> Note: ZeppelinHub is not related to Apache Zeppelin project. - ### Knox SSO [KnoxSSO](https://knox.apache.org/books/knox-0-13-0/dev-guide.html#KnoxSSO+Integration) provides an abstraction for integrating any number of authentication systems and SSO solutions and enables participating web applications to scale to those solutions more easily. Without the token exchange capabilities offered by KnoxSSO each component UI would need to integrate with each desired solution on its own. +When Knox SSO is enabled for Zeppelin, the [Apache Hadoop Groups Mapping](https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html) configuration will used internally to determine the group memberships of the user who is trying to log in. Role-based access permission can be set based on groups as seen by Hadoop. + To enable this, apply the following change in `conf/shiro.ini` under `[main]` section. ``` @@ -249,7 +237,7 @@ authc = org.apache.zeppelin.realm.jwt.KnoxAuthenticationFilter ### HTTP SPNEGO Authentication HTTP SPNEGO (Simple and Protected GSS-API NEGOtiation) is the standard way to support Kerberos Ticket based user authentication for Web Services. Based on [Apache Hadoop Auth](https://hadoop.apache.org/docs/current/hadoop-auth/index.html), Zeppelin supports ability to authenticate users by accepting and validating their Kerberos Ticket. -When HTTP SPNEGO Authentication is enabled for Zeppelin, the [Apache Hadoop Groups Mapping](https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html) configuration will used internally to determine group membership of user who is trying to log in. Role-based access permission can be set based on groups as seen by Hadoop. +When HTTP SPNEGO Authentication is enabled for Zeppelin, the [Apache Hadoop Groups Mapping](https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html) configuration will used internally to determine the group memberships of the user who is trying to log in. Role-based access permission can be set based on groups as seen by Hadoop. To enable this, apply the following change in `conf/shiro.ini` under `[main]` section. @@ -266,7 +254,9 @@ authc = org.apache.zeppelin.realm.kerberos.KerberosAuthenticationFilter ``` For above configuration to work, user need to do some more configurations outside Zeppelin. -1). A valid SPNEGO keytab should be available on the Zeppelin node and should be readable by 'zeppelin' user. If there is a SPNEGO keytab already available (because of other Hadoop service), it can be reused here and no need to generate a new keytab. An example of working SPNEGO keytab could be: +1. A valid SPNEGO keytab should be available on the Zeppelin node and should be readable by 'zeppelin' user. If there is a SPNEGO keytab already available (because of another Hadoop service), it can be reused here without generating a new keytab. +An example of working SPNEGO keytab could be: + ``` $ klist -kt /etc/security/keytabs/spnego.service.keytab Keytab name: FILE:/etc/security/keytabs/spnego.service.keytab @@ -277,16 +267,19 @@ KVNO Timestamp Principal 2 11/26/2018 16:58:38 HTTP/zeppelin.fqdn.domain.com@EXAMPLE.COM 2 11/26/2018 16:58:38 HTTP/zeppelin.fqdn.domain.com@EXAMPLE.COM ``` -and the keytab permission should be: (VERY IMPORTANT to not to set this to 777 or readable by all !!!): + +Ensure that the keytab premissions are sufficiently strict while still readable by the 'zeppelin' user: + ``` $ ls -l /etc/security/keytabs/spnego.service.keytab -r--r-----. 1 root hadoop 346 Nov 26 16:58 /etc/security/keytabs/spnego.service.keytab ``` -Above 'zeppelin' user happens to be member of 'hadoop' group. -2). A secret signature file must be present on Zeppelin node (readable to 'zeppelin' user). This file contains the random binary numbers which is used to sign 'hadoop.auth' cookie, generated during SPNEGO exchange. If such a file is already generated and available on the Zeppelin node, it should be used rather than generating a new file. +Note that for the above example, the 'zeppelin' user can read the keytab because they are a member of the 'hadoop' group. +2. A secret signature file must be present on Zeppelin node, readable by 'zeppelin' user. This file contains the random binary numbers which is used to sign 'hadoop.auth' cookie, generated during SPNEGO exchange. If such a file is already generated and available on the Zeppelin node, it should be used rather than generating a new file. Commands to generate a secret signature file (if required): + ``` dd if=/dev/urandom of=/etc/security/http_secret bs=1024 count=1 chown hdfs:hadoop /etc/security/http_secret diff --git a/docs/setup/storage/configuration_storage.md b/docs/setup/storage/configuration_storage.md new file mode 100644 index 00000000000..3a5bbff9dfb --- /dev/null +++ b/docs/setup/storage/configuration_storage.md @@ -0,0 +1,65 @@ +--- +layout: page +title: "Configuration Storage for Apache Zeppelin" +description: "Configuration Storage for Apache Zeppelin" +group: setup/storage +--- + +{% include JB/setup %} + +# Configuration Storage for Apache Zeppelin + +
    + +## Overview +Zeppelin has lots of configuration which is stored in files: +- `interpreter.json` (This file contains all the interpreter setting info) +- `notebook-authorization.json` (This file contains all the note authorization info) +- `credential.json` (This file contains the credential info) + +## Configuration Storage in hadoop compatible file system + +Set following properties in `zeppelin-site.xml`: +```xml + + zeppelin.config.storage.class + org.apache.zeppelin.storage.FileSystemConfigStorage + configuration persistence layer implementation + + + zeppelin.config.fs.dir + + path on the hadoop compatible file system + +``` +Also specify `HADOOP_CONF_DIR` in `zeppelin-env.sh` so that Zeppelin can find the right hadoop configuration files. + +If your hadoop cluster is kerberized, then you need to specify `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` + + +## Configuration Storage in local file system +By default, zeppelin store configuration on local file system. +```xml + + zeppelin.config.storage.class + org.apache.zeppelin.storage.LocalConfigStorage + configuration persistence layer implementation + + + zeppelin.config.fs.dir + + path on local file system + +``` \ No newline at end of file diff --git a/docs/setup/storage/storage.md b/docs/setup/storage/notebook_storage.md similarity index 92% rename from docs/setup/storage/storage.md rename to docs/setup/storage/notebook_storage.md index f53fa6b8df4..e7a5b26ccc0 100644 --- a/docs/setup/storage/storage.md +++ b/docs/setup/storage/notebook_storage.md @@ -1,7 +1,7 @@ --- layout: page title: "Notebook Storage for Apache Zeppelin" -description: Apache Zeppelin has a pluggable notebook storage mechanism controlled by zeppelin.notebook.storage configuration option with multiple implementations." +description: "Apache Zeppelin has a pluggable notebook storage mechanism controlled by zeppelin.notebook.storage configuration option with multiple implementations." group: setup/storage --- - -``` - -or set the environment variable in the file **zeppelin-env.sh**: - -```bash -export ZEPPELIN_NOTEBOOK_STORAGE="org.apache.zeppelin.notebook.repo.GitNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo" -``` - -Secondly, you need to set the environment variables in the file **zeppelin-env.sh**: - -```bash -export ZEPPELINHUB_API_TOKEN=ZeppelinHub token -export ZEPPELINHUB_API_ADDRESS=address of ZeppelinHub service (e.g. https://www.zeppelinhub.com) -``` - -You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token). - - ## Notebook Storage in MongoDB Using `MongoNotebookRepo`, you can store your notebook in [MongoDB](https://www.mongodb.com/). diff --git a/docs/usage/display_system/basic.md b/docs/usage/display_system/basic.md index 5080fffcbca..f0a4dde9654 100644 --- a/docs/usage/display_system/basic.md +++ b/docs/usage/display_system/basic.md @@ -1,7 +1,7 @@ --- layout: page title: "Basic Display System in Apache Zeppelin" -description: "There are 3 basic display systems in Apache Zeppelin. By default, Zeppelin prints interpreter responce as a plain text using text display system. With %html directive, Zeppelin treats your output as HTML. You can also simply use %table display system to leverage Zeppelin's built in visualization." +description: "There are several display systems available in Apache Zeppelin. By default, Zeppelin prints interpreter response as a plain text using %text display system. However, display systems for showing HTML, tables, markdown or even graph visualizations are also available." group: usage/display_system --- + 1.3 @@ -81,6 +83,16 @@ ${unirest.version} + + org.json + json + + + + org.junit.jupiter + junit-jupiter-params + test + @@ -88,9 +100,6 @@ maven-enforcer-plugin - - maven-dependency-plugin - maven-resources-plugin @@ -100,9 +109,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - - false - diff --git a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java index 45b37c4ebc6..69f5b05679b 100644 --- a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java +++ b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java @@ -22,6 +22,11 @@ import com.google.gson.JsonObject; import org.apache.commons.lang3.StringUtils; +import org.apache.zeppelin.elasticsearch.client.ElasticsearchClient; +import org.apache.zeppelin.elasticsearch.client.ElasticsearchClientType; +import org.apache.zeppelin.elasticsearch.client.ElasticsearchClientTypeBuilder; +import org.apache.zeppelin.elasticsearch.client.HttpBasedClient; +import org.apache.zeppelin.elasticsearch.client.TransportBasedClient; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; @@ -55,19 +60,18 @@ import org.apache.zeppelin.elasticsearch.action.ActionResponse; import org.apache.zeppelin.elasticsearch.action.AggWrapper; import org.apache.zeppelin.elasticsearch.action.HitWrapper; -import org.apache.zeppelin.elasticsearch.client.ElasticsearchClient; -import org.apache.zeppelin.elasticsearch.client.HttpBasedClient; -import org.apache.zeppelin.elasticsearch.client.TransportBasedClient; import org.apache.zeppelin.interpreter.Interpreter; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; +import static org.apache.zeppelin.elasticsearch.client.ElasticsearchClientType.TRANSPORT; + /** * Elasticsearch Interpreter for Zeppelin. */ public class ElasticsearchInterpreter extends Interpreter { - private static Logger logger = LoggerFactory.getLogger(ElasticsearchInterpreter.class); + private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchInterpreter.class); private static final String HELP = "Elasticsearch interpreter:\n" + "General format: ///