Skip to content

Commit

Permalink
Add support for hashing DOSZ parent files (#352)
Browse files Browse the repository at this point in the history
  • Loading branch information
schellingb authored Jul 27, 2024
1 parent 4357a14 commit cea3209
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 26 deletions.
143 changes: 117 additions & 26 deletions src/rhash/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -716,16 +716,25 @@ struct rc_hash_zip_idx
uint8_t* data;
};

struct rc_hash_ms_dos_dosz_state
{
const char* path;
const struct rc_hash_ms_dos_dosz_state* child;
};

static int rc_hash_zip_idx_sort(const void* a, const void* b)
{
struct rc_hash_zip_idx *A = (struct rc_hash_zip_idx*)a, *B = (struct rc_hash_zip_idx*)b;
size_t len = (A->length < B->length ? A->length : B->length);
return memcmp(A->data, B->data, len);
}

static int rc_hash_zip_file(md5_state_t* md5, void* file_handle)
static int rc_hash_ms_dos_parent(md5_state_t* md5, const struct rc_hash_ms_dos_dosz_state *child, const char* parentname, uint32_t parentname_len);
static int rc_hash_ms_dos_dosc(md5_state_t* md5, const struct rc_hash_ms_dos_dosz_state *dosz);

static int rc_hash_zip_file(md5_state_t* md5, void* file_handle, const struct rc_hash_ms_dos_dosz_state* dosz)
{
uint8_t buf[2048], *alloc_buf, *cdir_start, *cdir_max, *cdir, *hashdata, eocdirhdr_size, cdirhdr_size;
uint8_t buf[2048], *alloc_buf, *cdir_start, *cdir_max, *cdir, *hashdata, eocdirhdr_size, cdirhdr_size, nparents;
uint32_t cdir_entry_len;
size_t sizeof_idx, indices_offset, alloc_size;
int64_t i_file, archive_size, ecdh_ofs, total_files, cdir_size, cdir_ofs;
Expand Down Expand Up @@ -789,7 +798,7 @@ static int rc_hash_zip_file(md5_state_t* md5, void* file_handle)
rc_file_seek(file_handle, ecdh_ofs - 20, SEEK_SET);
if (rc_file_read(file_handle, buf, 20) == 20 && RC_ZIP_READ_LE32(buf) == 0x07064b50) /* locator signature */
{
/* Found the locator, now read the actual ZIP64 end of central directory header */
/* Found the locator, now read the actual ZIP64 end of central directory header */
int64_t ecdh64_ofs = (int64_t)RC_ZIP_READ_LE64(buf + 0x08);
if (ecdh64_ofs <= (archive_size - 56))
{
Expand Down Expand Up @@ -837,7 +846,7 @@ static int rc_hash_zip_file(md5_state_t* md5, void* file_handle)
hashindex = hashindices;

/* Now process the central directory file records */
for (i_file = 0, cdir = cdir_start; i_file < total_files && cdir >= cdir_start && cdir <= cdir_max; i_file++, cdir += cdir_entry_len)
for (i_file = nparents = 0, cdir = cdir_start; i_file < total_files && cdir >= cdir_start && cdir <= cdir_max; i_file++, cdir += cdir_entry_len)
{
const uint8_t *name, *name_end;
uint32_t signature = RC_ZIP_READ_LE32(cdir + 0x00);
Expand Down Expand Up @@ -907,6 +916,27 @@ static int rc_hash_zip_file(md5_state_t* md5, void* file_handle)
return rc_hash_error("Encountered invalid entry in ZIP central directory");
}

/* A DOSZ file can contain a special empty <base>.dosz.parent file in its root which means a parent dosz file is used */
if (dosz && decomp_size == 0 && filename_len > 7 && !strncasecmp((const char*)name + filename_len - 7, ".parent", 7) && !memchr(name, '/', filename_len) && !memchr(name, '\\', filename_len))
{
/* A DOSZ file can only have one parent file */
if (nparents++)
{
free(alloc_buf);
return rc_hash_error("Invalid DOSZ file with multiple parents");
}

/* If there is an error with the parent DOSZ, abort now */
if (!rc_hash_ms_dos_parent(md5, dosz, (const char*)name, (filename_len - 7)))
{
free(alloc_buf);
return 0;
}

/* We don't hash this meta file so a user is free to rename it and the parent file */
continue;
}

/* Write the pointer and length of the data we record about this file */
hashindex->data = hashdata;
hashindex->length = filename_len + 1 + 4 + 8;
Expand Down Expand Up @@ -951,6 +981,11 @@ static int rc_hash_zip_file(md5_state_t* md5, void* file_handle)
md5_append(md5, hashindices->data, (int)hashindices->length);

free(alloc_buf);

/* If this is a .dosz file, check if an associated .dosc file exists */
if (dosz && !rc_hash_ms_dos_dosc(md5, dosz))
return 0;

return 1;

#undef RC_ZIP_READ_LE16
Expand All @@ -960,45 +995,101 @@ static int rc_hash_zip_file(md5_state_t* md5, void* file_handle)
#undef RC_ZIP_WRITE_LE64
}

static int rc_hash_ms_dos(char hash[33], const char* path)
static int rc_hash_ms_dos_parent(md5_state_t* md5, const struct rc_hash_ms_dos_dosz_state *child, const char* parentname, uint32_t parentname_len)
{
md5_state_t md5;
size_t path_len;
int res;
const char *lastfslash = strrchr(child->path, '/');
const char *lastbslash = strrchr(child->path, '\\');
const char *lastslash = (lastbslash > lastfslash ? lastbslash : lastfslash);
size_t dir_len = (lastslash ? (lastslash + 1 - child->path) : 0);
char* parent_path = (char*)malloc(dir_len + parentname_len + 1);
struct rc_hash_ms_dos_dosz_state parent;
const struct rc_hash_ms_dos_dosz_state *check;
void* parent_handle;
int parent_res;

/* Build the path of the parent by combining the directory of the current file with the name */
if (!parent_path)
return rc_hash_error("Could not allocate temporary buffer");

void* file_handle = rc_file_open(path);
if (!file_handle)
return rc_hash_error("Could not open file");
memcpy(parent_path, child->path, dir_len);
memcpy(parent_path + dir_len, parentname, parentname_len);
parent_path[dir_len + parentname_len] = '\0';

/* hash the main content zip file first */
md5_init(&md5);
res = rc_hash_zip_file(&md5, file_handle);
rc_file_close(file_handle);
/* Make sure there is no recursion where a parent DOSZ is an already seen child DOSZ */
for (check = child->child; check; check = check->child)
{
if (!strcmp(check->path, parent_path))
{
free(parent_path);
return rc_hash_error("Invalid DOSZ file with recursive parents");
}
}

if (!res)
return 0;
/* Try to open the parent DOSZ file */
parent_handle = rc_file_open(parent_path);
if (!parent_handle)
{
char message[1024];
snprintf(message, sizeof(message), "DOSZ parent file '%s' does not exist", parent_path);
free(parent_path);
return rc_hash_error(message);
}

/* if this is a .dosz file, check if an associated .dosc file exists */
path_len = strlen(path);
if (path[path_len-1] == 'z' || path[path_len-1] == 'Z')
/* Fully hash the parent DOSZ ahead of the child */
parent.path = parent_path;
parent.child = child;
parent_res = rc_hash_zip_file(md5, parent_handle, &parent);
rc_file_close(parent_handle);
free(parent_path);
return parent_res;
}

static int rc_hash_ms_dos_dosc(md5_state_t* md5, const struct rc_hash_ms_dos_dosz_state *dosz)
{
size_t path_len = strlen(dosz->path);
if (dosz->path[path_len-1] == 'z' || dosz->path[path_len-1] == 'Z')
{
char *dosc_path = strdup(path);
void* file_handle;
char *dosc_path = strdup(dosz->path);
if (!dosc_path)
return rc_hash_error("Could not allocate temporary buffer");

/* swap the z to c and use the same capitalization, hash the file if it exists*/
dosc_path[path_len-1] = (path[path_len-1] == 'z' ? 'c' : 'C');
/* Swap the z to c and use the same capitalization, hash the file if it exists */
dosc_path[path_len-1] = (dosz->path[path_len-1] == 'z' ? 'c' : 'C');
file_handle = rc_file_open(dosc_path);
free((void*)dosc_path);
free(dosc_path);

if (file_handle)
{
res = rc_hash_zip_file(&md5, file_handle);
/* Hash the DOSC as a plain zip file (pass NULL as dosz state) */
int res = rc_hash_zip_file(md5, file_handle, NULL);
rc_file_close(file_handle);

if (!res)
return 0;
}
}
return 1;
}

static int rc_hash_ms_dos(char hash[33], const char* path)
{
struct rc_hash_ms_dos_dosz_state dosz;
md5_state_t md5;
int res;

void* file_handle = rc_file_open(path);
if (!file_handle)
return rc_hash_error("Could not open file");

/* hash the main content zip file first */
md5_init(&md5);
dosz.path = path;
dosz.child = NULL;
res = rc_hash_zip_file(&md5, file_handle, &dosz);
rc_file_close(file_handle);

if (!res)
return 0;

return rc_hash_finalize(&md5, hash);
}
Expand Down
39 changes: 39 additions & 0 deletions test/rhash/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -921,3 +921,42 @@ uint8_t* generate_zip64_file(size_t* image_size)
*image_size = sizeof(data_zip64);
return image;
}

uint8_t* generate_child_dosz_file(size_t* image_size)
{
const uint8_t data_zip[] = {
'P','K',0x03,0x04, /* local file header signature */
0x0A,0x00, 0x00,0x00, 0x00,0x00, 0x00,0xBC, 0x98,0x21, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, /* info */
0x10,0x00, 0x00,0x00, 'b','a','s','e','.','d','o','s','z','.','p','a','r','e','n','t', /* file name length; extra length; name data */
0x73, 0x04, 0x00, /* compressed content */

'P','K',0x03,0x04,
0x0A,0x00, 0x00,0x00, 0x00,0x00, 0x00,0xBC, 0x98,0x21, 0x29,0x54,0xB3,0x22, 0x05,0x00,0x00,0x00, 0x05,0x00,0x00,0x00, /* info */
0x09,0x00, 0x00,0x00, 'C','H','I','L','D','.','T','X','T', /* file name length; extra length; name data */
0x63, 0x68, 0x69, 0x6C, 0x64, /* content */

'P','K',0x01,0x02, /* central directory file header signature */
0x3F,0x00, 0x0A,0x00, 0x00,0x00, 0x00,0x00, 0x00,0xBC, 0x98,0x21, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
0x10,0x00, 0x00,0x00, 0x00,0x00, 0x00,0x00, 0x00,0x00, 0x20,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
'b','a','s','e','.','d','o','s','z','.','p','a','r','e','n','t',

'P','K',0x01,0x02, /* central directory file header signature */
0x3F,0x00, 0x0A,0x00, 0x00,0x00, 0x00,0x00, 0x00,0xBC, 0x98,0x21, 0x29,0x54,0xB3,0x22, 0x05,0x00,0x00,0x00, 0x05,0x00,0x00,0x00,
0x09,0x00, 0x00,0x00, 0x00,0x00, 0x00,0x00, 0x00,0x00, 0x20,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
'C','H','I','L','D','.','T','X','T',

'P','K',0x05,0x06, /* end of central directory signature */
0x00,0x00, 0x00,0x00, /* disk number */
0x02,0x00, 0x02,0x00, /* number of directory entries on this disk and total */
0x75,0x00,0x00,0x00, /* size of central directory (bytes) */
0x5D,0x00,0x00,0x00, /* offset of start of central directory */
0x00,0x00 /* comment length */
};

uint8_t* image = (uint8_t*)malloc(sizeof(data_zip));
if (image != NULL)
memcpy(image, data_zip, sizeof(data_zip));
if (image_size)
*image_size = sizeof(data_zip);
return image;
}
1 change: 1 addition & 0 deletions test/rhash/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ uint8_t* generate_iso9660_file(uint8_t* image, const char* filename, const uint8

uint8_t* generate_zip_file(size_t* image_size);
uint8_t* generate_zip64_file(size_t* image_size);
uint8_t* generate_child_dosz_file(size_t* image_size);

extern uint8_t test_rom_z64[64];
extern uint8_t test_rom_n64[64];
Expand Down
39 changes: 39 additions & 0 deletions test/rhash/test_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,44 @@ static void test_hash_msdos_dosz_with_dosc()
ASSERT_STR_EQUALS(hash_dosc, expected_dosc_md5);
}

static void test_hash_msdos_dosz_with_parent()
{
size_t image_base_size, image_child_size;
uint8_t* image_base = generate_zip_file(&image_base_size);
uint8_t* image_child = generate_child_dosz_file(&image_child_size);
char hash_dosz[33], hash_dosc[33], hash_dosc2[33];
const char* expected_dosz_md5 = "623c759476b8b5adb46362f8f0b60769";
const char* expected_dosc_md5 = "ecd9d776cbaad63094829d7b8dbe5959";
const char* expected_dosc2_md5 = "cb55c123936ad84479032ea6444cb1a1";

/* Add base dosz file and child dosz file which will get hashed together */
mock_file(0, "base.dosz", image_base, image_base_size);
mock_file(1, "child.dosz", image_child, image_child_size);

/* test file hash */
int result_dosz = rc_hash_generate_from_file(hash_dosz, RC_CONSOLE_MS_DOS, "child.dosz");

/* test file hash with base.dosc also existing */
mock_file(2, "base.dosc", image_base, image_base_size);
int result_dosc = rc_hash_generate_from_file(hash_dosc, RC_CONSOLE_MS_DOS, "child.dosz");

/* test file hash with child.dosc also existing */
mock_file(3, "child.dosc", image_base, image_base_size);
int result_dosc2 = rc_hash_generate_from_file(hash_dosc2, RC_CONSOLE_MS_DOS, "child.dosz");

/* cleanup */
free(image_base);
free(image_child);

/* validation */
ASSERT_NUM_EQUALS(result_dosz, 1);
ASSERT_NUM_EQUALS(result_dosc, 1);
ASSERT_NUM_EQUALS(result_dosc2, 1);
ASSERT_STR_EQUALS(hash_dosz, expected_dosz_md5);
ASSERT_STR_EQUALS(hash_dosc, expected_dosc_md5);
ASSERT_STR_EQUALS(hash_dosc2, expected_dosc2_md5);
}

/* ========================================================================= */

static void test_hash_nes_32k()
Expand Down Expand Up @@ -2309,6 +2347,7 @@ void test_hash(void) {
TEST(test_hash_msdos_dosz);
TEST(test_hash_msdos_dosz_zip64);
TEST(test_hash_msdos_dosz_with_dosc);
TEST(test_hash_msdos_dosz_with_parent);

/* Neo Geo CD */
TEST(test_hash_neogeocd);
Expand Down

0 comments on commit cea3209

Please sign in to comment.