Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create and use a standard utility library for handling zip files in the frontend #11539

Merged
merged 9 commits into from
Jan 2, 2024
37 changes: 3 additions & 34 deletions packages/hashi/src/H5P/H5PRunner.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,6 @@ function contentIdentifier(contentId) {
return `cid-${contentId}`;
}

// Looks for any URLs referenced inside url()
const cssPathRegex = /(url\(['"]?)([^"')]+)?(['"]?\))/g;

export function replacePaths(dep, packageFiles) {
return packageFiles[dep].replace(cssPathRegex, function(match, p1, p2, p3) {
try {
// Construct a URL with a dummy base so that we can concatenate the
// dependency URL with the URL relative to the dependency
// and then read the pathname to get the new path.
// Take substring to remove the leading slash to match the reference file paths
// in packageFiles.
const path = new URL(p2, new URL(dep, 'http://b.b/')).pathname.substring(1);
// Look to see if there is a URL in our packageFiles mapping that
// that has this as the source path.
const newUrl = packageFiles[path];
if (newUrl) {
// If so, replace the instance with the new URL.
return `${p1}${newUrl}${p3}`;
}
} catch (e) {
console.debug('Error during URL handling', e); // eslint-disable-line no-console
}
// Otherwise just return the match so that it is unchanged.
return match;
});
}

const metadataKeys = [
'title',
'a11yTitle',
Expand Down Expand Up @@ -580,9 +553,7 @@ export default class H5PRunner {
processCssDependencies() {
const concatenatedCSS = this.sortedDependencies.reduce((wholeCSS, dependency) => {
return (this.cssDependencies[dependency] || []).reduce((allCss, cssDep) => {
const css = replacePaths(cssDep, this.packageFiles[dependency]);
// We have completed the path substition, so concatenate the CSS.
return `${allCss}${css}\n\n`;
return `${allCss}${this.packageFiles[dependency][cssDep]}\n\n`;
}, wholeCSS);
}, '');
this.cssURL = URL.createObjectURL(new Blob([concatenatedCSS], { type: 'text/css' }));
Expand All @@ -601,7 +572,7 @@ export default class H5PRunner {
this.contentJson = file.toString();
} else {
// Create blob urls for every item in the content folder
this.contentPaths[fileName] = file.toUrl(fileName);
this.contentPaths[fileName] = file.toUrl();
}
}

Expand Down Expand Up @@ -634,7 +605,7 @@ export default class H5PRunner {
this.packageFiles[packagePath][fileName] = file.toString();
} else {
// Otherwise just create a blob URL for this file and store it in our packageFiles maps.
this.packageFiles[packagePath][fileName] = file.toUrl(fileName);
this.packageFiles[packagePath][fileName] = file.toUrl();
}
}

Expand All @@ -647,8 +618,6 @@ export default class H5PRunner {
contentFiles.map(file => this.processContent(file));
}),
...Object.keys(this.packageFiles).map(packagePath => {
// JSZip uses regex for path matching, so we first do regex escaping on the packagePath
// in order to get an exact match, and not accidentally do a regex match based on the path
return this.zip.files(packagePath).then(packageFiles => {
packageFiles.map(file => this.processPackageFile(file, packagePath));
});
Expand Down
53 changes: 0 additions & 53 deletions packages/hashi/test/H5P.spec.js

This file was deleted.

46 changes: 46 additions & 0 deletions packages/kolibri-zip/src/fileUtils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
export function getAbsoluteFilePath(baseFilePath, relativeFilePath) {
// Construct a URL with a dummy base so that we can concatenate the
// dependency URL with the URL relative to the dependency
// and then read the pathname to get the new path.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize this was pre-existing and it's just moving it, so non-blocking.

This comment feels rather hard to wrap my brain around, mostly the
that we can concatenate the dependency URL with the URL relative to the dependency part
The second "URL" (i.e. "the URL relative to the dependency") is where I get lost.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically, we are using the JS URL utilities to resolve a relative file path (by turning it into a resolution of a relative URL against a dummy base URL).

So the file that we are currently looking at in the zip file: e.g. style/images.css references another file relatively ../images/cool.png, this will resolve the reference to an absolute file path from the root of the zip file - so images/cool.png.

Copy link
Member

@nucleogenesis nucleogenesis Dec 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit confusing for me as well but I played w/ it a little bit and I think that the thing that isn't clear is that the URL constructor auto-magically resolves the relative path against the second argument.

I played w/ it in the terminal a bit and this kind of made it make sense to me as I kept adding ..s to the relative path:

> new URL("../best/friend.css", "https://a.com/hi/there/my/mortal/enemy").pathname
'/hi/there/my/best/friend.css'

> new URL("../../best/friend.css", "https://a.com/hi/there/my/mortal/enemy").pathname
'/hi/there/best/friend.css'

> new URL("../../../best/friend.css", "https://a.com/hi/there/my/mortal/enemy").pathname
'/hi/best/friend.css'

I think that maybe the comment could use some clarification or an example like the one you gave, Richard, re: styles/header/logo.css has a reference to ../../assets/logo.svg so we need to get a pathname assets/logo.svg out of that. Maybe that's more complicated though 😄

// Take substring to remove the leading slash to match the reference file paths
// in packageFiles.
try {
return new URL(relativeFilePath, new URL(baseFilePath, 'http://b.b/')).pathname.substring(1);
} catch (e) {
console.debug('Error during URL handling', e); // eslint-disable-line no-console
}
return null;
}

// Looks for any URLs referenced inside url()
// Handle any query parameters separately.
const cssPathRegex = /(url\(['"]?)([^?"')]+)?(\?[^'"]+)?(['"]?\))/g;

export function getCSSPaths(fileContents) {
return Array.from(fileContents.matchAll(cssPathRegex), ([, , p2]) => p2);
}

export function replaceCSSPaths(fileContents, packageFiles) {
return fileContents.replace(cssPathRegex, function(match, p1, p2, p3, p4) {
try {
// Look to see if there is a URL in our packageFiles mapping that
// that has this as the source path.
const newUrl = packageFiles[p2];
if (newUrl) {
// If so, replace the instance with the new URL.
return `${p1}${newUrl}${p4}`;
}
} catch (e) {
console.debug('Error during URL handling', e); // eslint-disable-line no-console
}
// Otherwise just return the match so that it is unchanged.
return match;
});
}

export const defaultFilePathMappers = {
css: {
getPaths: getCSSPaths,
replacePaths: replaceCSSPaths,
},
};
94 changes: 77 additions & 17 deletions packages/kolibri-zip/src/index.js
Original file line number Diff line number Diff line change
@@ -1,67 +1,127 @@
import { unzip, strFromU8 } from 'fflate';
import { unzip, strFromU8, strToU8 } from 'fflate';
import isPlainObject from 'lodash/isPlainObject';
import loadBinary from './loadBinary';
import mimetypes from './mimetypes.json';
import { getAbsoluteFilePath, defaultFilePathMappers } from './fileUtils';

class File {
class ExtractedFile {
marcellamaki marked this conversation as resolved.
Show resolved Hide resolved
constructor(name, obj) {
this.name = name;
this.obj = obj;
}

get fileNameExt() {
return (this.name.split('.').slice(-1)[0] || '').toLowerCase();
}

get mimeType() {
return mimetypes[this.fileNameExt] || '';
}

toString() {
return strFromU8(this.obj);
}

toUrl(fileName = null) {
fileName = fileName || this.name;
let type = '';
const fileNameExt = fileName.split('.').slice(-1)[0];
if (fileNameExt) {
const ext = fileNameExt.toLowerCase();
type = mimetypes[ext];
}
const blob = new Blob([this.obj.buffer], { type });
toUrl() {
const blob = new Blob([this.obj.buffer], { type: this.mimeType });
return URL.createObjectURL(blob);
}
}

export default class ZipFile {
constructor(url) {
constructor(url, { filePathMappers } = { filePathMappers: defaultFilePathMappers }) {
this._loadingError = null;
this._extractedFileCache = {};
this._fileLoadingPromise = loadBinary(url)
.then(data => {
this.zipData = new Uint8Array(data);
})
.catch(err => {
this._loadingError = err;
});
this.filePathMappers = isPlainObject(filePathMappers) ? filePathMappers : {};
}

_getFiles(filter) {
if (this._loadingError) {
return Promise.reject(this._loadingError);
/*
* @param {ExtractedFile} file - The file to carry out replacement of references in
* @param {Object} visitedPaths - A map of paths that have already been visited to prevent a loop
* @return {Promise[ExtractedFile]} - A promise that resolves to the file with references replaced
*/
_replaceFiles(file, visitedPaths) {
const mapper = this.filePathMappers[file.fileNameExt];
if (!mapper) {
return Promise.resolve(file);
}
visitedPaths = { ...visitedPaths };
visitedPaths[file.name] = true;
const fileContents = file.toString();
// Filter out any paths that are in our already visited paths, as that means we are in a
// referential loop where one file has pointed us to another, which is now point us back
// to the source.
// Because we need to modify the file before we generate the URL, we can't resolve this loop.
const paths = mapper
.getPaths(fileContents)
.filter(path => !visitedPaths[getAbsoluteFilePath(file.name, path)]);
const absolutePathsMap = paths.reduce((acc, path) => {
acc[getAbsoluteFilePath(file.name, path)] = path;
return acc;
}, {});
return this._getFiles(file => absolutePathsMap[file.name], visitedPaths).then(
replacementFiles => {
const replacementFileMap = replacementFiles.reduce((acc, replacementFile) => {
acc[absolutePathsMap[replacementFile.name]] = replacementFile.toUrl();
return acc;
}, {});
const newFileContents = mapper.replacePaths(fileContents, replacementFileMap);
file.obj = strToU8(newFileContents);
return file;
}
);
}

_getFiles(filterPredicate, visitedPaths = {}) {
const filter = file => !this._extractedFileCache[file.name] && filterPredicate(file);
return this._fileLoadingPromise.then(() => {
return new Promise((resolve, reject) => {
unzip(this.zipData, { filter }, (err, unzipped) => {
if (err) {
reject(err);
return;
}
if (!unzipped) {
const alreadyUnzipped = Object.values(this._extractedFileCache).filter(filterPredicate);
if (!unzipped && !alreadyUnzipped.length) {
reject('No files found');
return;
}
resolve(Object.entries(unzipped).map(([name, obj]) => new File(name, obj)));
Promise.all(
Object.entries(unzipped).map(([name, obj]) => {
const extractedFile = new ExtractedFile(name, obj);
return this._replaceFiles(extractedFile, visitedPaths).then(extractedFile => {
this._extractedFileCache[name] = extractedFile;
return extractedFile;
});
})
).then(extractedFiles => {
resolve(extractedFiles.concat(alreadyUnzipped));
});
});
});
});
}

file(filename) {
if (this._loadingError) {
return Promise.reject(this._loadingError);
}
if (this._extractedFileCache[filename]) {
return Promise.resolve(this._extractedFileCache[filename]);
}
return this._getFiles(file => file.name === filename).then(files => files[0]);
}
files(path) {
if (this._loadingError) {
return Promise.reject(this._loadingError);
}
return this._getFiles(file => file.name.startsWith(path));
}
}
Loading