From ed3e9886422ff7ce308803a62c5ab68c3ca1ef3b Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 30 Apr 2024 11:41:59 +0200 Subject: [PATCH] Update vendor/libarchive to libarchive master 83e8b0ea8 #2147 archive_string: clean up strncat_from_utf8_to_utf8 (36047967a) #2153 archive_match: check archive_read_support_format_raw() return value (0ce1b4c38) #2154 archive_match: turn counter into flag (287e05d53) #2155 lha: Do not allow negative file sizes (93b11caed) #2156 tests: setenv LANG to en_US.UTF-8 in bsdunzip test_I.c (83e8b0ea8) Obtained from: libarchive Libarchive commit: 83e8b0ea8c3b07e07ac3dee90a8724565f8e53fd --- libarchive/archive_match.c | 18 ++-- libarchive/archive_read_support_format_lha.c | 2 + libarchive/archive_string.c | 96 +++++++++----------- unzip/test/test_I.c | 8 ++ 4 files changed, 61 insertions(+), 63 deletions(-) diff --git a/libarchive/archive_match.c b/libarchive/archive_match.c index fc8a4ce8127b..3ab8eda36038 100644 --- a/libarchive/archive_match.c +++ b/libarchive/archive_match.c @@ -46,7 +46,7 @@ struct match { struct match *next; - int matches; + int matched; struct archive_mstring pattern; }; @@ -605,7 +605,8 @@ add_pattern_from_file(struct archive_match *a, struct match_list *mlist, return (ARCHIVE_FATAL); } r = archive_read_support_format_raw(ar); - r = archive_read_support_format_empty(ar); + if (r == ARCHIVE_OK) + r = archive_read_support_format_empty(ar); if (r != ARCHIVE_OK) { archive_copy_error(&(a->archive), ar); archive_read_free(ar); @@ -724,12 +725,12 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname) matched = NULL; for (match = a->inclusions.first; match != NULL; match = match->next){ - if (match->matches == 0 && + if (!match->matched && (r = match_path_inclusion(a, match, mbs, pathname)) != 0) { if (r < 0) return (r); a->inclusions.unmatched_count--; - match->matches++; + match->matched = 1; matched = match; } } @@ -752,11 +753,10 @@ path_excluded(struct archive_match *a, int mbs, const void *pathname) for (match = a->inclusions.first; match != NULL; match = match->next){ /* We looked at previously-unmatched inclusions already. */ - if (match->matches > 0 && + if (match->matched && (r = match_path_inclusion(a, match, mbs, pathname)) != 0) { if (r < 0) return (r); - match->matches++; return (0); } } @@ -879,7 +879,7 @@ match_list_unmatched_inclusions_next(struct archive_match *a, for (m = list->unmatched_next; m != NULL; m = m->next) { int r; - if (m->matches) + if (m->matched) continue; if (mbs) { const char *p; @@ -1793,7 +1793,7 @@ match_owner_name_mbs(struct archive_match *a, struct match_list *list, < 0 && errno == ENOMEM) return (error_nomem(a)); if (p != NULL && strcmp(p, name) == 0) { - m->matches++; + m->matched = 1; return (1); } } @@ -1814,7 +1814,7 @@ match_owner_name_wcs(struct archive_match *a, struct match_list *list, < 0 && errno == ENOMEM) return (error_nomem(a)); if (p != NULL && wcscmp(p, name) == 0) { - m->matches++; + m->matched = 1; return (1); } } diff --git a/libarchive/archive_read_support_format_lha.c b/libarchive/archive_read_support_format_lha.c index 4d6290ac33bb..ae5a1d7d668e 100644 --- a/libarchive/archive_read_support_format_lha.c +++ b/libarchive/archive_read_support_format_lha.c @@ -1347,6 +1347,8 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha, lha->compsize = archive_le64dec(extdheader); extdheader += sizeof(uint64_t); lha->origsize = archive_le64dec(extdheader); + if (lha->compsize < 0 || lha->origsize < 0) + goto invalid; } break; case EXT_CODEPAGE: diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index f39677ad7a26..be6c39600d72 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -2640,81 +2640,69 @@ unicode_to_utf16le(char *p, size_t remaining, uint32_t uc) } /* - * Copy UTF-8 string in checking surrogate pair. - * If any surrogate pair are found, it would be canonicalized. + * Append new UTF-8 string to existing UTF-8 string. + * Existing string is assumed to already be in proper form; + * the new string will have invalid sequences replaced and + * surrogate pairs canonicalized. */ static int -strncat_from_utf8_to_utf8(struct archive_string *as, const void *_p, +strncat_from_utf8_to_utf8(struct archive_string *as, const void *_src, size_t len, struct archive_string_conv *sc) { - const char *s; - char *p, *endp; - int n, ret = 0; - + int ret = 0; + const char *src = _src; (void)sc; /* UNUSED */ + /* Pre-extend the destination */ if (archive_string_ensure(as, as->length + len + 1) == NULL) return (-1); - s = (const char *)_p; - p = as->s + as->length; - endp = as->s + as->buffer_length -1; - do { + /* Invariant: src points to the first UTF8 byte that hasn't + * been copied to the destination `as`. */ + for (;;) { + int n; uint32_t uc; - const char *ss = s; - size_t w; + const char *e = src; - /* - * Forward byte sequence until a conversion of that is needed. - */ - while ((n = utf8_to_unicode(&uc, s, len)) > 0) { - s += n; + /* Skip UTF-8 sequences until we reach end-of-string or + * a code point that needs conversion. */ + while ((n = utf8_to_unicode(&uc, e, len)) > 0) { + e += n; len -= n; } - if (ss < s) { - if (p + (s - ss) > endp) { - as->length = p - as->s; - if (archive_string_ensure(as, - as->buffer_length + len + 1) == NULL) - return (-1); - p = as->s + as->length; - endp = as->s + as->buffer_length -1; - } - - memcpy(p, ss, s - ss); - p += s - ss; + /* Copy the part that doesn't need conversion */ + if (e > src) { + if (archive_string_append(as, src, e - src) == NULL) + return (-1); + src = e; } - /* - * If n is negative, current byte sequence needs a replacement. - */ - if (n < 0) { + if (n == 0) { + /* We reached end-of-string */ + return (ret); + } else { + /* Next code point needs conversion */ + char t[4]; + size_t w; + + /* Try decoding a surrogate pair */ if (n == -3 && IS_SURROGATE_PAIR_LA(uc)) { - /* Current byte sequence may be CESU-8. */ - n = cesu8_to_unicode(&uc, s, len); + n = cesu8_to_unicode(&uc, src, len); } + /* Not a (valid) surrogate, so use a replacement char */ if (n < 0) { - ret = -1; - n *= -1;/* Use a replaced unicode character. */ - } - - /* Rebuild UTF-8 byte sequence. */ - while ((w = unicode_to_utf8(p, endp - p, uc)) == 0) { - as->length = p - as->s; - if (archive_string_ensure(as, - as->buffer_length + len + 1) == NULL) - return (-1); - p = as->s + as->length; - endp = as->s + as->buffer_length -1; + ret = -1; /* Return -1 if we used any replacement */ + n *= -1; } - p += w; - s += n; + /* Consume converted code point */ + src += n; len -= n; + /* Convert and append new UTF-8 sequence. */ + w = unicode_to_utf8(t, sizeof(t), uc); + if (archive_string_append(as, t, w) == NULL) + return (-1); } - } while (n > 0); - as->length = p - as->s; - as->s[as->length] = '\0'; - return (ret); + } } static int diff --git a/unzip/test/test_I.c b/unzip/test/test_I.c index 5d31ce8d1611..d189edca1a5c 100644 --- a/unzip/test/test_I.c +++ b/unzip/test/test_I.c @@ -33,6 +33,7 @@ DEFINE_TEST(test_I) { const char *reffile = "test_I.zip"; + const char *lang; int r; #if HAVE_SETLOCALE @@ -44,6 +45,8 @@ DEFINE_TEST(test_I) skipping("setlocale() not available on this system."); #endif + lang = getenv("LANG"); + setenv("LANG", "en_US.UTF-8", 1); extract_reference_file(reffile); r = systemf("%s -I UTF-8 %s >test.out 2>test.err", testprog, reffile); assertEqualInt(0, r); @@ -51,4 +54,9 @@ DEFINE_TEST(test_I) assertEmptyFile("test.err"); assertTextFileContents("Hello, World!\n", "Γειά σου Κόσμε.txt"); + + if (lang == NULL) + unsetenv("LANG"); + else + setenv("LANG", lang, 1); }