From 71d286a997b041eca8e52aa0e2d25b440b2e224a Mon Sep 17 00:00:00 2001 From: Clinton Pinto Date: Thu, 11 Jun 2020 15:00:10 -0700 Subject: [PATCH] Add action files to root of project --- .../actions/action-gtranslate/gtranslate.py | 141 --------- .github/actions/action-gtranslate/lib.sh | 69 ----- .../action-gtranslate/translate-strings.sh | 17 -- .github/workflows/pull_request.yml | 17 -- .../Dockerfile => Dockerfile | 3 +- .../action.yml => action.yml | 0 gtranslate.py | 284 ++++++++++++++++++ lib.sh | 27 ++ translate-strings.sh | 12 + 9 files changed, 324 insertions(+), 246 deletions(-) delete mode 100644 .github/actions/action-gtranslate/gtranslate.py delete mode 100644 .github/actions/action-gtranslate/lib.sh delete mode 100755 .github/actions/action-gtranslate/translate-strings.sh delete mode 100644 .github/workflows/pull_request.yml rename .github/actions/action-gtranslate/Dockerfile => Dockerfile (80%) rename .github/actions/action-gtranslate/action.yml => action.yml (100%) create mode 100644 gtranslate.py create mode 100644 lib.sh create mode 100755 translate-strings.sh diff --git a/.github/actions/action-gtranslate/gtranslate.py b/.github/actions/action-gtranslate/gtranslate.py deleted file mode 100644 index 9241439..0000000 --- a/.github/actions/action-gtranslate/gtranslate.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# output format: values-XX folders with strings.xml inside - -# SUBROUTINES -# This subroutine reformats chinese strings to separate a 'd' or 's' character that is -# appended to the chinese character. -def parse_chinese_word(chinese_string): - str_length = len(chinese_string) - s = chinese_string - for i, v in enumerate(chinese_string): - if (v == 's') | (v == 'd') & (str_length > 1): - splitPos = i + 1 - l,r = chinese_string[:splitPos], chinese_string[splitPos:] - s = l+ " " +r - return s - -def reformat_chinese_string(chinese_string): - words = chinese_string.split(' ') - s = "" - for singleWord in words: - newWord = parse_chinese_word(singleWord) - s+="" +newWord - return s - -# Function which calls the google translate API and converts the string to_translate to the language specified in to_language -def translate(to_translate, to_language="auto"): - # send request - to_translate = unescape(to_translate) - to_translate = to_translate.replace('\\\'', '\'') - to_translate = to_translate.replace('&', '%26') - url = "https://translation.googleapis.com/language/translate/v2?source=en&target=%s&key=%s&q=%s"% (to_language, TRANSLATIONS_API_KEY, to_translate) - try: - response = requests.get(url) - response.raise_for_status() - except HTTPError as http_err: - print ("Error occurred") - return to_translate - - json_data = json.loads(response.text) - translated_text = json_data['data']['translations'][0]['translatedText'] - if 'zh' in to_language : - # There's an issue with placeholder strings i.e Strings with %1$s, %1$d, %d, %s getting - # misaligned in the response. This occurs only for chinese translations. Reformat the string - # in that case - translated_text = reformat_chinese_string(translated_text) - translated_text = translated_text.replace('%', '%') - - parsed2 = unescape(translated_text) - # fix parameter strings - parsed3 = re.sub('% ([ds])', r' %\1', parsed2) - parsed4 = re.sub('% ([\d]) \$ ([ds])', r' %\1$\2', parsed3).strip() - parsed5 = parsed4.replace('\'', '\\\'') - return parsed5 - -# function which handles nested xml tags like plural strings and string-arrays. Parses each -# child tag and calls the translate function. -def handle_nested_xml(root, i) : - for j in range(len(root[i])): - # for each translatable string call the translation subroutine - # and replace the string by its translation, - isTranslatable=root[i][j].get('translatable') - if(root[i][j].tag=='item') & (isTranslatable!='false'): - # trasnalte text and fix any possible issues traslotor creates: messing up HTML tags, adding spaces between string formatting elements - totranslate=root[i][j].text - if(totranslate!=None): - root[i][j].text=translate(totranslate,OUTPUT_LANGUAGE).replace('\\ ', '\\').replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') - - # if string was broken down due to HTML tags, reassemble it - if len(root[i][j]) != 0: - for element in range(len(root[i][j])): - root[i][j][element].text = " " + translate(root[i][j][element].text, OUTPUT_LANGUAGE).replace('\\ ', '\\').replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') - root[i][j][element].tail = " " + translate(root[i][j][element].tail, OUTPUT_LANGUAGE).replace('\\ ', '\\').replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') - -# MAIN PROGRAM -# import libraries -import requests -from requests.exceptions import HTTPError -from html import unescape -import os -import xml.etree.ElementTree as ET -import json -import re -import sys - -TRANSLATIONS_API_KEY = sys.argv[1] -OUTPUT_LANGUAGES = sys.argv[2] -OUTPUT_LANGUAGE_LIST = OUTPUT_LANGUAGES.split(',') -print (OUTPUT_LANGUAGE_LIST) -BASEPATH = "app/src/main/res/" -INFILE = BASEPATH + "values/" + "strings.xml" -INPUTLANGUAGE = "en" - -for OUTPUT_LANGUAGE in OUTPUT_LANGUAGE_LIST: - # create outfile in subfolder if doesn't already exist - name, ext=os.path.splitext(INFILE) - if not os.path.exists(BASEPATH + "values-" + OUTPUT_LANGUAGE): - os.mkdir(BASEPATH + "values-" + OUTPUT_LANGUAGE) - OUTFILE = BASEPATH + "values-" + OUTPUT_LANGUAGE + "/strings.xml" - - # read xml structure - tree = ET.parse(INFILE) - root = tree.getroot() - # Keeps track of xml elements marked as non-translatable - removal_list = [] - - # cycle through elements - for i in range(len(root)): - # for each translatable string call the translation subroutine - # and replace the string by its translation, - # descend into each string array - isTranslatable=root[i].get('translatable') - if(root[i].tag=='string') & (isTranslatable!='false'): - # trasnalte text and fix any possible issues traslotor creates: messing up HTML tags, adding spaces between string formatting elements - totranslate=root[i].text - if(totranslate!=None): - root[i].text=translate(totranslate,OUTPUT_LANGUAGE).replace('\\ ', '\\').replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') - - # if string was broken down due to HTML tags, reassemble it - if len(root[i]) != 0: - for element in range(len(root[i])): - root[i][element].text = " " + translate(root[i][element].text, OUTPUT_LANGUAGE).replace('\\ ', '\\').replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') - root[i][element].tail = " " + translate(root[i][element].tail, OUTPUT_LANGUAGE).replace('\\ ', '\\').replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') - - if(root[i].tag=='string-array'): - handle_nested_xml(root, i) - - if(root[i].tag=='plurals'): - handle_nested_xml(root, i) - - if isTranslatable == 'false': - # Add to the removal_list - removal_list.append(root[i]) - - # Remove elements marked as non-translatable - for element in removal_list: - root.remove(element) - - # write new xml file - tree.write(OUTFILE, encoding='utf-8') diff --git a/.github/actions/action-gtranslate/lib.sh b/.github/actions/action-gtranslate/lib.sh deleted file mode 100644 index 1128a2c..0000000 --- a/.github/actions/action-gtranslate/lib.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -# This file adapted from https://github.com/bltavares/actions/blob/master/prettier/lib.sh - -_is_automated_event() { - AUTOFIX_EVENTS=${AUTOFIX_EVENTS:-push} - - if [[ ${GITHUB_EVENT_NAME} =~ ^($AUTOFIX_EVENTS)$ ]]; then - return 0 - fi - - return 1 -} - -_requires_token() { - if [[ -z $GITHUB_TOKEN ]]; then - echo "Set the GITHUB_TOKEN env variable." - exit 1 - fi -} - -_git_is_dirty() { - [[ -n "$(git status -s)" ]] -} - -_commit_and_push() { - git add app/src/main/res/* - git commit -m "translator-bot: Added Translations" - git pull - git push -} - -_commit_if_needed() { - if _git_is_dirty; then - _commit_and_push - fi -} - -_should_translate() { - stringsFile="app/src/main/res/values/strings.xml" - for i in $(git diff ${GITHUB_HEAD_REF} ${GITHUB_BASE_REF} --name-only); do - if [ "$i" == "$stringsFile" ] - then - return 0 - fi - done - return 1 -} - -_remove_scripts() { - rm gtranslate.py translation-config.properties -} - -_setup_git() { - git config --global user.name "Translator-Bot" - git config --global user.email "leia-codacy-bot@leiainc.com" - - git checkout "${GITHUB_BASE_REF}" - git pull - git checkout "${GITHUB_HEAD_REF}" - git pull -} - -_copy_scripts() { - cd / - cp gtranslate.py "${GITHUB_WORKSPACE}" - cp translation-config.properties "${GITHUB_WORKSPACE}" - cd "${GITHUB_WORKSPACE}" -} diff --git a/.github/actions/action-gtranslate/translate-strings.sh b/.github/actions/action-gtranslate/translate-strings.sh deleted file mode 100755 index cc3a045..0000000 --- a/.github/actions/action-gtranslate/translate-strings.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# shellcheck disable=SC1091 -source /lib.sh - -_setup_git - -if _should_translate;then - _copy_scripts - python3 gtranslate.py ${INPUT_TRANSLATIONKEY} ${INPUT_OUTPUTLANGUAGES} - _remove_scripts - _requires_token - _commit_if_needed - fi - diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml deleted file mode 100644 index 8a94290..0000000 --- a/.github/workflows/pull_request.yml +++ /dev/null @@ -1,17 +0,0 @@ -on: pull_request -name: Translations Workflow -jobs: - formatCode: - name: Translate Strings - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: '0' - - name: translate strings - uses: ./.github/actions/action-gtranslate - with: - translationKey: 'AIzaSyDBzuLnStbM-GJv7ILmACyS_8l3kzz2ez4' - outputLanguages: 'es,fr,zh' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/actions/action-gtranslate/Dockerfile b/Dockerfile similarity index 80% rename from .github/actions/action-gtranslate/Dockerfile rename to Dockerfile index e51332e..578e50e 100644 --- a/.github/actions/action-gtranslate/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM alpine:3.10 LABEL "com.github.actions.name"="Strings translater" -LABEL "com.github.actions.description"="Formats Java code using Google Java Format" +LABEL "com.github.actions.description"="Translates strings using Google Translate API" LABEL "com.github.actions.icon"="mic" LABEL "com.github.actions.color"="green" @@ -23,5 +23,4 @@ RUN pip3 install requests COPY translate-strings.sh / COPY lib.sh / COPY gtranslate.py / -COPY translation-config.properties / ENTRYPOINT ["/translate-strings.sh"] diff --git a/.github/actions/action-gtranslate/action.yml b/action.yml similarity index 100% rename from .github/actions/action-gtranslate/action.yml rename to action.yml diff --git a/gtranslate.py b/gtranslate.py new file mode 100644 index 0000000..0e935f1 --- /dev/null +++ b/gtranslate.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# output format: values-XX folders with strings.xml inside + + +def parse_chinese_word(chinese_string): + str_length = len(chinese_string) + formatted_string = chinese_string + for i, v in enumerate(chinese_string): + if (v == 's') | (v == 'd') & (str_length > 1): + split_pos = i + 1 + l, r = chinese_string[:split_pos], chinese_string[split_pos:] + formatted_string = l + " " + r + return formatted_string + + +def reformat_chinese_string(chinese_string): + words = chinese_string.split(' ') + formatted_string = "" + for singleWord in words: + new_word = parse_chinese_word(singleWord) + formatted_string += "" + new_word + return formatted_string + + +def query_translations_api(text_to_translate, to_language): + + params = {'source': 'en', 'target': to_language, 'key': TRANSLATIONS_API_KEY, 'q': text_to_translate } + # Edge case where Google doesn't translate correctly if there is a period immediately before \n. Add a space + + url = "https://translation.googleapis.com/language/translate/v2?%s" % (urllib.parse.urlencode(params)) + try: + response = requests.get(url) + response.raise_for_status() + except HTTPError as http_err: + status_code = http_err.response.status_code + print("Http Error occurred - Error Status Code : " + status_code) + exit(1) + + json_data = json.loads(response.text) + translated_text = json_data['data']['translations'][0]['translatedText'] + translated_text = translated_text.replace('0x0A', '\\n') + return translated_text + + +# Function which calls the google translate API and converts the string to_ +# translate to the language specified in to_language +def translate(text_to_translate, to_language="auto"): + # Workaround for issue with google translate translating \n to \norde + text_to_translate = text_to_translate.replace('.\\n', '. \\n') + text_to_translate = text_to_translate.replace('\\n', '0x0A') + print('Translating to ' + to_language + ' - ' + text_to_translate) + translated_text = query_translations_api(text_to_translate, to_language) + + if 'zh' in to_language: + # There's an issue with placeholder strings i.e Strings with %1$s, %1$d, %d, %s getting + # misaligned in the response. This occurs only for chinese translations. Reformat the string + # in that case + translated_text = reformat_chinese_string(translated_text) + translated_text = translated_text.replace('%', '%') + + parsed2 = unescape(translated_text) + # fix parameter strings + parsed3 = re.sub('% ([ds])', r' %\1', parsed2) + parsed4 = re.sub('% ([\d]) \$ ([ds])', r' %\1$\2', parsed3).strip() + parsed5 = parsed4.replace('\'', '\\\'') + parsed6 = parsed5.replace('0x0A', '\\n') + return parsed6 + + +# Handles parsing a single xml tag. ( for regular string xml or for plural, string-arrays) +# and calling the translate function. Translates the xml only if +# 1) translated strings.xml doesn't exist +# 2) translated element doesn't exist in the translated strings.xml file +# 3) Existing translated element has been changed since the last translation. +# +# If one of the above conditions are satisfied, the translate function is called to translate the english +# string and save the hash of the existing english text in the 'translated-from' attribute for that +# xml. +# +# If conditions are not satisfied, it simply copies over the existing translated element from the existing +# translated strings.xml into the current element. + +# Also handles, single xml elements that are further broken down by Element tree because of +# html tags used within the xml text. +def handle_single_xml_element_translation(existing_xml_element, single_xml_element): + + if (len(single_xml_element) == 0) & (single_xml_element.text is not None): + # Simple xml with text + text_to_translate = single_xml_element.text + existing_text_hashcode = "" + manual_translation_exists = False + if existing_xml_element is not None: + # Get hashcode of previously translated english string if exists + existing_text_hashcode = existing_xml_element.get('translated-from') + manual_translation_exists = existing_text_hashcode is None + + if should_translate(existing_text_hashcode, text_to_translate, manual_translation_exists): + # Contents are not same. Translate the text in the xml + single_xml_element.text = translate(text_to_translate, output_language).replace('\\ ', '\\') \ + .replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') + single_xml_element.set('translated-from', encode(text_to_translate)) + else: + # Contents are same. Copy over the existing translated xml text into the current xml + single_xml_element.text = existing_xml_element.text + if not manual_translation_exists: + single_xml_element.set('translated-from', existing_xml_element.get('translated-from')) + + elif len(single_xml_element) > 0: + # XML was broken down due to nested html tags in the text. Reassemble the text by removing + # the html tags. + nested_text = get_nested_xml_text(single_xml_element) + existing_text_hashcode = "" + manual_translation_exists = False + if existing_xml_element is not None: + # Get hashcode of previously translated english string if exists + existing_text_hashcode = existing_xml_element.get('translated-from') + manual_translation_exists = existing_text_hashcode is None + + if should_translate(existing_text_hashcode, nested_text, manual_translation_exists): + # String was changed. Extract the string from html, translate it and reassemble. + reassembled_string = "" + if single_xml_element.text is not None: + reassembled_string += single_xml_element.text + single_xml_element.text = translate(single_xml_element.text, output_language) + # if string was broken down due to HTML tags, reassemble it + for child_element in single_xml_element: + if child_element.text is not None: + reassembled_string += child_element.text + # print('Text is ' + child_element.text) + child_element.text = " " + translate(child_element.text, output_language).replace('\\ ', '\\') \ + .replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') + + if child_element.tail is not None: + # print('Tail is ' + child_element.tail) + reassembled_string += child_element.tail + child_element.tail = " " + translate(child_element.tail, output_language).replace('\\ ', '\\') \ + .replace('\\ n ', '\\n').replace('\\n ', '\\n').replace('/ ', '/') + + # print('Complete text is ' + s) + # Use the reassembled english string to encode the hash and save it in the 'translated-from' attribute + single_xml_element.set('translated-from', encode(reassembled_string)) + else: + # Contents of existing xml element is the same. Simply copy it over with the nested html. + single_xml_element.text = existing_xml_element.text + for child_element, existing_child_element in zip(single_xml_element, existing_xml_element): + child_element.text = existing_child_element.text + child_element.tail = existing_child_element.tail + + if not manual_translation_exists: + single_xml_element.set('translated-from', existing_xml_element.get('translated-from')) + + +# Returns hash of a string +def encode(text): + result = hashlib.md5(text.encode()) + return result.hexdigest() + + +# Finds and returns xml element if exists in the given root. +def get_existing_xml(existing_root, element_to_find): + if existing_root is None: + return None + xml_search_query = ".//%s[@name='%s']" % (element_to_find.tag, element_to_find.get('name')) + return existing_root.find(xml_search_query) + + +# Assembles text that was broken down in an xml element due to html tags +# present within the text. +def get_nested_xml_text(nested_xml_element): + s = "" + if (nested_xml_element is not None) & (nested_xml_element.text is not None): + s += nested_xml_element.text + for child_xml in nested_xml_element: + if child_xml.text is not None: + s += child_xml.text + if child_xml.tail is not None: + s += child_xml.tail + return s + + +# Returns true if the hashcode of text_to_translate is the same as +# the existing_xml_hashcode +def should_translate(existing_xml_hashcode, text_to_translate, translation_exists): + if translation_exists: + return False + if (existing_xml_hashcode is not None) & (text_to_translate is not None): + current_text_hashcode = encode(text_to_translate) + # print(existing_element_text) + # print(current_text_hashcode) + if existing_xml_hashcode == current_text_hashcode: + return False + return True + + +# MAIN PROGRAM +if __name__ == '__main__': + + # import libraries + import requests + from requests.exceptions import HTTPError + from html import unescape + import os + import xml.etree.ElementTree as ET + import json + import re + import hashlib + import argparse + import urllib.parse + + parser = argparse.ArgumentParser() + parser.add_argument('api', action='store', type=str, help='The API key for Translate API') + parser.add_argument('output', action='store', type=str, + help='Output languages separated by comma. For eg. ' + 'To translate to spanish and french, use \'es,fr\'') + parser.add_argument('path', action='store', type=str, help='The path to the github workspace') + args = parser.parse_args() + + TRANSLATIONS_API_KEY = args.api + OUTPUT_LANGUAGES = args.output + WORKSPACE_PATH = args.path + + OUTPUT_LANGUAGE_LIST = OUTPUT_LANGUAGES.split(',') + print (OUTPUT_LANGUAGE_LIST) + + BASE_PATH = WORKSPACE_PATH + "/app/src/main/res/" + INFILE = BASE_PATH + "values/" + "strings.xml" + + for output_language in OUTPUT_LANGUAGE_LIST: + # create outfile in subfolder if doesn't already exist + name, ext = os.path.splitext(INFILE) + if not os.path.exists(BASE_PATH + "values-" + output_language): + os.mkdir(BASE_PATH + "values-" + output_language) + OUTFILE = BASE_PATH + "values-" + output_language + "/strings.xml" + + existing_translated_tree = None + existing_translated_root = None + + if os.path.exists(OUTFILE): + # If translated strings.xml exists, parse it and get the root. + existing_translated_tree = ET.parse(OUTFILE) + existing_translated_root = existing_translated_tree.getroot() + + # read xml structure + english_tree = ET.parse(INFILE) + english_root = english_tree.getroot() + removal_list = [] + + # cycle through xml elements in english strings.xml + for xml_element in english_root.iter(): + + is_translatable = xml_element.get('translatable') + + if is_translatable == 'false': + # is_translatable attribute is set to false. Simply continue. + removal_list.append(xml_element) + continue + + # Beyond this point, all strings are translatable. + + if xml_element.tag == 'string': + # XML element is of type + existing_translated_xml_element = get_existing_xml(existing_translated_root, xml_element) + handle_single_xml_element_translation(existing_translated_xml_element, xml_element) + + elif (xml_element.tag == 'string-array') | (xml_element.tag == 'plurals'): + # XML element is of type or + existing_translated_xml_element = get_existing_xml(existing_translated_root, xml_element) + if (existing_translated_xml_element is not None) \ + & (len(existing_translated_xml_element) == len(xml_element)): + # This xml element exists in translated file, cycle through both, and translate + for existing_item_element, item_element in zip(existing_translated_xml_element, xml_element): + handle_single_xml_element_translation(existing_item_element, item_element) + else: + # This xml element doesn't exist, simply translate. + for item_element in xml_element: + handle_single_xml_element_translation(None, item_element) + + for element in removal_list: + english_root.remove(element) + + # write the translated tree to the output file. + english_tree.write(OUTFILE, encoding="utf-8") diff --git a/lib.sh b/lib.sh new file mode 100644 index 0000000..82d1803 --- /dev/null +++ b/lib.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +_git_is_dirty() { + [[ -n "$(git status -s)" ]] +} + +_commit_and_push() { + git add app/src/main/res/* + git commit -m "translator-bot: Added Translations" + git push +} + +_commit_if_needed() { + if _git_is_dirty; then + _commit_and_push + fi +} + +_setup_git() { + git config --global user.name "Translator-Bot" + git config --global user.email "leia-codacy-bot@leiainc.com" + + git checkout "${GITHUB_BASE_REF}" + git pull + git checkout "${GITHUB_HEAD_REF}" + git pull +} diff --git a/translate-strings.sh b/translate-strings.sh new file mode 100755 index 0000000..8160c49 --- /dev/null +++ b/translate-strings.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +set -e + +# shellcheck disable=SC1091 +source /lib.sh + +_setup_git +python3 /gtranslate.py ${INPUT_TRANSLATIONKEY} ${INPUT_OUTPUTLANGUAGES} ${GITHUB_WORKSPACE} +_commit_if_needed + +