From cffd18a887b10ec9c24e55a95fc8507b890606a5 Mon Sep 17 00:00:00 2001 From: LizBaldo Date: Wed, 28 Aug 2024 10:10:49 -0400 Subject: [PATCH] [IA-4760] new GHA to replace last jenkins job (#493) * new GHA to replace jenkins job * also do a manual workflow trigger for debugging purposes later --- .github/workflows/publish_all.yml | 42 ++++ build_all.sh | 25 ++ hub/Dockerfile | 24 -- jenkins/Jenkinsfile | 385 ------------------------------ jenkins/README.md | 6 - scripts/generate_version_docs.py | 6 +- 6 files changed, 70 insertions(+), 418 deletions(-) create mode 100644 .github/workflows/publish_all.yml create mode 100644 build_all.sh delete mode 100644 hub/Dockerfile delete mode 100644 jenkins/Jenkinsfile delete mode 100644 jenkins/README.md diff --git a/.github/workflows/publish_all.yml b/.github/workflows/publish_all.yml new file mode 100644 index 00000000..8d71f067 --- /dev/null +++ b/.github/workflows/publish_all.yml @@ -0,0 +1,42 @@ +name: Build and publish all images to GCR + +on: + push: + branches: [ master ] + workflow_dispatch: + inputs: + branch: + description: 'Branch to run the workflow on' + required: false + default: 'master' + +jobs: + publish-job: + runs-on: self-hosted + steps: + - name: Checkout current code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - id: auth + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.TD_GCP_SA_KEY }} + create_credentials_file: true + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v0.3.0 + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + + - name: Explicitly auth Docker for GCR + run: gcloud auth configure-docker --quiet + + - name: Build Docker image and publish + run: | + gcloud auth configure-docker + ./build_all.sh diff --git a/build_all.sh b/build_all.sh new file mode 100644 index 00000000..db22942d --- /dev/null +++ b/build_all.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Run `gcloud auth configure-docker --quiet` before running this script +# Example: ./build_all.sh + +# Create the ordered list of images to build +# 1- terra-jupyter-base +# 2- terra-jupyter-python +# 3- terra-jupyter-r +# 4- terra-jupyter-gatk +# 5- terra-jupyter-hail +# 6- terra-jupyter-aou +# 7- terra-jupyter-bioconductor +# 8- terra-rstudio-aou +# 9- wondershaper +images=(terra-jupyter-base terra-jupyter-python terra-jupyter-r terra-jupyter-gatk terra-jupyter-hail terra-jupyter-aou terra-jupyter-bioconductor terra-rstudio-aou wondershaper) + +# Loop over each image to build in the correct order +for image in images; do + # Call build.sh with the image to build + echo 'Building and publishing the following image to GCR: $image' + ./build.sh "$image" "true" +done + +# Once all images have been built, generate and push the 'terra-docker-versions-new' doc +python scripts/generate_version_docs.py \ No newline at end of file diff --git a/hub/Dockerfile b/hub/Dockerfile deleted file mode 100644 index a0fa5e06..00000000 --- a/hub/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM ubuntu:14.04 - -RUN apt-get -y update - -RUN apt-get -y install software-properties-common - -RUN add-apt-repository -y ppa:cpick/hub - -RUN apt-get -y update - -RUN apt-get -y install hub - -RUN git config --system core.logallrefupdates false - -RUN mkdir -p /app/leo -WORKDIR /app/leo - -RUN useradd -m dsde-jenkins -USER dsde-jenkins - -RUN mkdir /home/dsde-jenkins/.config - -CMD ["/bin/bash"] - diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile deleted file mode 100644 index f9b8d216..00000000 --- a/jenkins/Jenkinsfile +++ /dev/null @@ -1,385 +0,0 @@ -//Builds the images in the terra-docker repo. - -// Currently, this is referenced in a jenkins job with the following configuration: -// Create a 'New Item' that is a Pipeline in Jenkins -// Specify Pipeline script from SCM with the scm for terra-docker and the appropriate branch -// Script Path should be the location of this file relative to the repo root - -import hudson.model.* -import hudson.FilePath -import jenkins.model.Jenkins - -//branch name we will create in leo. Will be appended with the build number. -def branchName = "automated-leonardo-hash-update-" -//lack of def makes it global -builtImagesFile = "built_images.txt" - -//used to exit early if no changes are detected -def shouldAbortBuild = false - -def LinkedHashSet imagesToBuild = new LinkedHashSet() - -def prLink = "" - -//declared in outer scope so it persists between steps -//omitting the 'def' is what makes it global -LinkedHashMap versionMap = [:] - -def String getChangeDescription(LinkedHashMap versionMap) { - def title = "[jenkins-generated-pr] Automated terra-docker image hash update" - def body = "$title\n These are the images that were built on this run" - def version = "" - - def String[] builtImages = sh( - script: "cat $builtImagesFile", - returnStdout: true - ).trim().split("\n") - - println("built images in getChangeDescription: " + versionMap.toString()) - - for (String image in builtImages) { - version = versionMap[image] - if (version == null) { - continue - } - println("updating body with $image: $version") - body = body + "\n- $image: $version" - } - - return body -} - -def LinkedHashSet getSortedChangedImages() { - def String[] images = sh( - script: "cat config/conf.json | jq -r '.image_data | .[] | select(.automated_flags.build == true) | .name'", - returnStdout: true - ).trim().split("\n") - - println("Images to build from config: $images") - - def LinkedHashSet filteredImages = new LinkedHashSet(); - //we must use a c-style loop due to jenkins serialization, aka https://www.devopscat.tech/2018/10/what-is-noncps-in-jenkins-pipeline/ - for (String currImage in images) { - if (hasImageChanged(currImage)) { - println("detected change for $currImage") - filteredImages.add(currImage); - } - } - - println("Filtered images: " + filteredImages.toString()); - - return getSortedSet(filteredImages); -} - -def Boolean hasImageChanged(String dir) { - def String output = sh ( - script: "git diff HEAD..HEAD~1 $dir", - returnStdout: true - ).trim() - - return !output.isEmpty() -} - -//we must sort using this unconventional approach until this issue is resolved: https://issues.jenkins-ci.org/browse/JENKINS-44924 - //Ensures the base images which may be used to build other images are built first. - //The order they will be sorted is terra-jupyter-base -> terra-jupyter-python -> terra-jupyter-r -> [...tail] - //If any of those images aren't present, the other will move up in the order -def LinkedHashSet getSortedSet(Collection imageSet) { - def sortedSet = new LinkedHashSet(); - def ArrayList priorityOrder = ["terra-jupyter-base", "terra-jupyter-python", "terra-jupyter-r"] - - for (String priorityImage in priorityOrder) { - if (imageSet.contains(priorityImage)) { - sortedSet.add(priorityImage) - imageSet.remove(priorityImage) - } - } - - sortedSet.addAll(imageSet) - - return sortedSet; -} - -def void buildAll(LinkedHashSet imageDirs) { - sh "touch $builtImagesFile" - for (String imageDir in imageDirs) { - buildImage(imageDir) - } -} - -def void buildImage(String imageDir) { - def workingDir = pwd() - sh "echo 'building the following image: $imageDir'" - def exitCode = sh( - script: "$workingDir/build.sh $imageDir 2>&1", - returnStatus: true - ) - - //if we cannot build an image because it will cause an overwrite of the remote gcr image, we set the build to unstable. - //We do this because the most likely scenario is it was desired that the image builds, but the VERSION file was not updated. - //we do not want to fail in this circumstance, as there are other images to build - if (exitCode == 14) { - println("The build script reported that building would result in overwritting an existing image. Setting build status to UNSTABLE. Consider updating the VERSION file for $imageDir") - currentBuild.setResult("UNSTABLE") - prevDesc = currentBuild.getDescription() == null ? "" : currentBuild.getDescription() + "
" - currentBuild.setDescription(prevDesc + "The build for image $imageDir failed because the image already exists.
") - } else if (exitCode != 0) { - println("Error detected building image $imageDir") - throw new RuntimeException("Unexpected error recieved from build.sh: \n\tCode: $exitCode") - } else { - sh "echo '$imageDir' >> $builtImagesFile" - println("Successfully built image $imageDir") - } - //we do nothing in the else case, the shell command exitted as expected -} - -def LinkedHashMap getVersionMap(LinkedHashSet imageDirs) { - def LinkedHashMap versionMap = [:] - - for (String currImageDir in imageDirs) { - def workingDir = pwd() - def version = sh( - script: String.format("cat config/conf.json | jq -r '.image_data | .[] | select(.name == \"%s\") | .version'", currImageDir), - returnStdout: true - ).trim() - println("Obtained version for $currImageDir: $version") - versionMap[currImageDir] = version - } - - versionMap -} - -//versionMap must be defined by a previous step for this to work -def void updateConfigHashes(replacementMap) { - def String[] configPaths = readFile("jenkins/confLocations.txt").trim().split("\n") - for (String currConfig in configPaths) { - println("Calling replaceTag function for $currConfig with $replacementMap") - replaceTags(currConfig, replacementMap) - } -} - -//parameterize with ":" prefix and end quote suffix instead of hardcode? -//an example of a line it would update is: `IMAGE_DIR:TAG"`. - //In this example, IMAGE_DIR would be the key in the map, and TAG would be whats replaced by the value. - //This function assumes that the line will end with a quote as well, and manually adds the quote so as not to assume a particular length for TAG -def void replaceTags(String filePath, LinkedHashMap replacementMap) { - def contents = readFile(filePath).split('\n') - - println("contents of $filePath before modification is $contents") - - def currLine = "" - for (String currReplacement in replacementMap.keySet()) { - for (int j = 0; j < contents.size(); j++) { - currLine = contents[j] - if (currLine.contains(currReplacement)) { - location = currLine.indexOf(currReplacement) - //find the image name from imageDirs.txt in the config, and substring it so we preserve everything up to the hash - //we add 1 because the image name is suffixed with a ':' before the hash that we wish to replace - //note we must also close the quote with this approach - def String newLine = currLine.substring(0, location + currReplacement.length() + 1) + replacementMap[currReplacement] + '"' - println("replacing line $currLine with $newLine") - contents[j] = newLine - } - } - } - - println("contents of $filePath after modification is $contents") - def joinedContents = contents.join('\n') - def workingDir = pwd() - - fp = createFilePath("$workingDir/$filePath") - //we add a trailing newline to the file as its lost in the split/join process - fp.write(joinedContents + "\n", "UTF-8") -} - -//Filepath is an object that abstracts away the fact the file can be on a remote jenkins node or the master jenkins node -def FilePath createFilePath(String path) { - if (env['NODE_NAME'] == null) { - throw new RuntimeException("envvar NODE_NAME is not set, probably not inside a node {} or running a different version of Jenkins!"); - } else if (env['NODE_NAME'].equals("master")) { - return new FilePath(path); - } else { - println("detecting remote file in createFilePath") - return new FilePath(Jenkins.getInstance().getComputer(env['NODE_NAME']).getChannel(), path); - } -} - -def abortBuild(reason) { - currentBuild.setResult('ABORTED') - prevDesc = currentBuild.getDescription() == null ? "" : currentBuild.getDescription() + "
" - currentBuild.setDescription(prevDesc + "$reason. Aborting the build.") -} - -pipeline { - //fixes this job to a specific node since some slaves have older versions of docker that cause problems - agent { label 'node212' } - - options { - //we don't want to be building more than one PR's changes at once, we will queue them instead - disableConcurrentBuilds() - } - - triggers { - // trigger build on pr hooks and not poll is what empty string does here, of course - pollSCM('') - } - - parameters { - booleanParam(name: "BUILD_SPECIFIED_IMAGES", defaultValue: false, - description: "Specify this, along with the images below, to force the build of specific images. Can be used in conjunction with the branch parameter for testing. Note this will fail if an image for the version in the VERSION file exists in google.") - string(name: "BRANCH", defaultValue: "master", - description: "Specify along with useCustomImageIdentifier to build the image with a specific name") - text(name: 'IMAGES_TO_BUILD', defaultValue: 'terra-jupyter-base\nterra-jupyter-python\nterra-jupyter-r\nterra-jupyter-gatk\nterra-jupyter-bioconductor\nterra-jupyter-hail\nterra-jupyter-aou\nterra-rstudio-aou', - description: 'The is a text field for newline separate images to build. Each image should be specified via the directory it resides in. The default value is a working example.') - } - - stages { - - stage('Terra Docker Git') { - steps { - git credentialsId: 'jenkins-ssh-github', url: 'git@github.com:DataBiosphere/terra-docker.git', branch: "$BRANCH" - } - } - - stage('Docker Auth') { - steps { - sh """ - sudo chown -R jenkins:jenkins /home/jenkins/.config - docker run --rm -v /etc/vault-token-dsde:/root/.vault-token:ro broadinstitute/dsde-toolbox:latest vault read --format=json secret/dsde/firecloud/common/image-build-account.json | jq .data > image-build-account.json - gcloud auth activate-service-account --key-file=image-build-account.json - """ - } - } - - stage('Get Images') { - steps { - script { - if (BUILD_SPECIFIED_IMAGES.equals("false")) { - imagesToBuild = getSortedChangedImages() - } else { - //the cast is unecessary, but makes it explicit that this will eliminate duplicates in user-input - imagesToBuild = IMAGES_TO_BUILD.trim().split('\n') as LinkedHashSet - } - - println("images that will be built: " + imagesToBuild.toString()) - - if (imagesToBuild.isEmpty()) { - abortBuild("No image changes detected") - shouldAbortBuild = true - return - } - } - } - } - - //filter image directories based on whether there are changes, and then build all - stage('Build Images') { - steps { - script { - buildAll(imagesToBuild) - - sh "python scripts/generate_version_docs.py" - //this is maintained as state between stages - versionMap = getVersionMap(imagesToBuild) - } - } - } - - //we are done with the terra-docker repo, clone the leo repo - stage('Leonardo Git') { - //run this step if the expression is true - when { expression { !shouldAbortBuild } } - - steps { - git credentialsId: 'jenkins-ssh-github', url: 'git@github.com:DataBiosphere/leonardo.git', branch: 'develop' - } - } - - //update the built image hashes in the leo repo - stage('Update Leonardo Hashes') { - when { expression { !shouldAbortBuild } } - - steps { - script { - //even thought versionMap is global, we must pass it to the function here for some reason - updateConfigHashes(versionMap) - - def changesMade = sh( - script: "git diff", - returnStdout: true - ) - - println("Changes made: " + changesMade) - - sh "git diff" - sh "git remote -v" - - //needed, because if a VERSION is not updated for all imageDirs with changes, the below git actions will cause the build to fail - if (changesMade.isEmpty()) { - abortBuild("No git changes detected") - shouldAbortBuild = true - return - } - - sshagent(['jenkins-ssh-github']) { - sh """ - git config --global user.name "Jenkins Role account" - git config --global user.email jenkins@dsp-fc-jenkins-slave-prod212.c.broad-dsp-techops.internal - - git checkout -b $branchName${BUILD_NUMBER} - git commit -am "automated config hash updates" - git push origin $branchName${BUILD_NUMBER} - """ - } - } - - } - } - - stage('Create PR') { - when { expression { !shouldAbortBuild } } - - steps { - withCredentials([file(credentialsId: 'hub-token', variable: 'token')]) { - sh """ - docker pull us.gcr.io/broad-dsp-gcr-public/hub:1 - docker rm -f hub-runtime | true - - docker run -itd --name=hub-runtime -u root -v ${WORKSPACE}:/app/leo us.gcr.io/broad-dsp-gcr-public/hub:1 - docker cp \$token hub-runtime:/home/dsde-jenkins/.config/hub - docker exec hub-runtime sudo chmod 777 /home/dsde-jenkins/.config/hub - """ - } - - script { - def prBody = getChangeDescription(versionMap) - prLink = sh( - script: "docker exec --user dsde-jenkins hub-runtime hub pull-request -b develop -m \"$prBody\"", - returnStdout: true - ).trim() - } - - sh """ - docker stop hub-runtime - docker rm -f hub-runtime | true - """ - } - } - } - - post { - always { - archiveArtifacts artifacts: "$builtImagesFile", allowEmptyArchive: true - cleanWs() - } - success { - slackSend(channel: '#dsp-callisto-internal', message: "Jenkins has successfully built images and created a PR with the updated terra-docker hashes.\n" + - "\nIf you would like to check the status of the PR, click: $prLink") - } - // failure { - // slackSend(channel: '#dsp-callisto-internal', message: "Jenkins has failed to on a build of the terra-docker repo. Some of the images may have been successfully built. See the log for details: $BUILD_URL") - // } - } - - -} diff --git a/jenkins/README.md b/jenkins/README.md deleted file mode 100644 index eff5965e..00000000 --- a/jenkins/README.md +++ /dev/null @@ -1,6 +0,0 @@ -There is a jenkins job titled leonardo-build-terra-docker that is sourced off the Jenkinsfile in this repository. -It is used to detect a change in any of the folders listed in imageDirs.txt, build the image with build.sh, and update the relevant hashes in the leonardo repo. -Only images following the names specified in imageDirs.txt will be updated in leonardo. - - -There is a file in the leonardo repo's jenkins folder called 'confLocations.txt' which is also relied on by this job, which specifies the files that require updating. diff --git a/scripts/generate_version_docs.py b/scripts/generate_version_docs.py index 3bf57ac4..da95f900 100755 --- a/scripts/generate_version_docs.py +++ b/scripts/generate_version_docs.py @@ -27,7 +27,7 @@ def generate_docs(): remote_versions_list = list(map(lambda image_doc: {image_doc["id"]: image_doc["version"]}, remote_docs)) remote_versions = utils.flatten_list_of_dicts(remote_versions_list) - print "current versions detected: " + str(remote_versions) + print("current versions detected: " + str(remote_versions)) legacy_gatk_doc = filter(lambda remote_doc: remote_doc["id"] == "terra-jupyter-gatk_legacy", remote_docs)[0] legacy_bioconductor_doc = utils.read_json_file(static_config_location)[0] # hard coding this until next bioconductor release (~06/2022) @@ -38,7 +38,7 @@ def generate_docs(): remote_doc = list(filter(lambda image_doc: image_doc["id"] == image_config["name"], remote_docs))[0] if image_config["name"] in remote_versions and image_config["version"] == remote_versions[image_config["name"]]: - print "using remote doc: {}".format(remote_doc) + print("using remote doc: {}".format(remote_doc)) doc = remote_doc else: doc = generate_doc_for_image(image_config) @@ -58,7 +58,7 @@ def generate_docs(): return docs def get_legacy_image(new_version, remote_doc, current_legacy_img): - print "generating legacy image" + print("generating legacy image") new_version = new_version.split(".") new_version_patch = int(new_version[2]) new_version_minor = int(new_version[1])