-
Notifications
You must be signed in to change notification settings - Fork 1
/
clean_studio_xml.py
77 lines (58 loc) · 2.26 KB
/
clean_studio_xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import argparse
import tempfile
import shutil
import tarfile
import os.path
import helpers
parser = argparse.ArgumentParser(description = "Clean up XML spat out by Studio.")
parser.add_argument("base", help="Base directory of Studio-dumped XML")
args = parser.parse_args()
if args.base.endswith("tar.gz"):
TAR_FILE = True
else:
TAR_FILE = False
if TAR_FILE:
dirpath = tempfile.mkdtemp()
with tarfile.open(args.base) as tar:
tar.extractall(dirpath)
basepath = os.path.join(dirpath, "course")
else:
basepath = args.base
## Helper functions ##
try:
# get root of course XML tree and load the XML for the entire course
tree = helpers.load_xml_course(basepath)
# Save the slugs used in the course, so we don't run into collisions while renaming
helpers.save_url_name_slugs(tree)
# Untested: Extract names from Youtube video titles, etc.
# helpers.propagate_youtube_information(tree)
## Propagate names down from parents to children
helpers.propagate_display_between_parent_and_child(tree)
# Give URL names based on display names
helpers.propagate_display_to_url_name(tree)
## We'll clean up the filenames Studio assigned for our HTML files
helpers.propagate_urlname_to_filename(tree, basepath)
## Add discussion tags where relevant. Add display names to discussions.
##
## If we don't have a nice name, we'll assume the discussion is
## about the previous node in the tree.
helpers.propagate_sibling_tags(tree)
# We're done. Dump problems and course.xml back to the file system
helpers.save_tree(basepath, tree)
# And finally, dump the mapping file
#
# TODO: Merge line below
#
# if not os.path.exists(os.path.join(args.base, 'static')):
# os.mkdir(os.path.join(args.base, 'static'))
helpers.save_url_name_map(basepath)
except:
print "Could not handle ", args.base
raise
# Now, we clean up a few JSON files.
for filename in ['policies/edx/policy.json', 'policies/edx/grading_policy.json']:
helpers.clean_json(basepath, filename)
if TAR_FILE:
with tarfile.open(args.base, "w:gz") as tar:
tar.add(basepath, arcname='course')
shutil.rmtree(dirpath)