From 4a4c274cc9d7943dadc8d5780b4115fe754e2b13 Mon Sep 17 00:00:00 2001 From: Neal Gompa Date: Jul 21 2023 11:29:21 +0000 Subject: Backport fix to lower memory usage of updateinfo processing (rhbz#2214520) --- diff --git a/0006-Save-memory-in-repo_updateinfoxml-by-not-interleavin.patch b/0006-Save-memory-in-repo_updateinfoxml-by-not-interleavin.patch new file mode 100644 index 0000000..0d0e833 --- /dev/null +++ b/0006-Save-memory-in-repo_updateinfoxml-by-not-interleavin.patch @@ -0,0 +1,92 @@ +From 296f854b7a9d4b83f6ecb1fc0626511132bf53cf Mon Sep 17 00:00:00 2001 +From: Michael Schroeder +Date: Thu, 20 Jul 2023 11:50:00 +0200 +Subject: [PATCH] Save memory in repo_updateinfoxml by not interleaving + repo_addflexarray calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Mixing repo_addflexarray calls will make the code moving the arrays +in the idarraydata all the time as it cannot append to the arrays. +So first collect the array contents of the collection flexarray and +then add it in one go. + +This is based on pull request #533 by Aleš Matěj . +--- + ext/repo_updateinfoxml.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/ext/repo_updateinfoxml.c b/ext/repo_updateinfoxml.c +index 22f7093d..0725f033 100644 +--- a/ext/repo_updateinfoxml.c ++++ b/ext/repo_updateinfoxml.c +@@ -113,7 +113,7 @@ struct parsedata { + Id pkghandle; + struct solv_xmlparser xmlp; + struct joindata jd; +- Id collhandle; ++ Queue collectionq; + }; + + /* +@@ -289,9 +289,7 @@ startElement(struct solv_xmlparser *xmlp, int state, const char *name, const cha + break; + + case STATE_COLLECTION: +- { +- pd->collhandle = repodata_new_handle(pd->data); +- } ++ queue_empty(&pd->collectionq); + break; + + /* data, module_handle, UPDATE_MODULE_ARCH, arch); + repodata_add_flexarray(pd->data, pd->handle, UPDATE_MODULE, module_handle); +- repodata_add_flexarray(pd->data, pd->collhandle, UPDATE_MODULE, module_handle); ++ queue_push2(&pd->collectionq, UPDATE_MODULE, module_handle); + break; + } + +@@ -436,13 +434,19 @@ endElement(struct solv_xmlparser *xmlp, int state, char *content) + break; + + case STATE_COLLECTION: +- repodata_add_flexarray(pd->data, pd->handle, UPDATE_COLLECTIONLIST, pd->collhandle); +- pd->collhandle = 0; ++ { ++ Id collhandle = repodata_new_handle(pd->data); ++ int i; ++ for (i = 0; i < pd->collectionq.count; i += 2) ++ repodata_add_flexarray(pd->data, collhandle, pd->collectionq.elements[i], pd->collectionq.elements[i + 1]); ++ repodata_add_flexarray(pd->data, pd->handle, UPDATE_COLLECTIONLIST, collhandle); ++ queue_empty(&pd->collectionq); ++ } + break; + + case STATE_PACKAGE: + repodata_add_flexarray(pd->data, pd->handle, UPDATE_COLLECTION, pd->pkghandle); +- repodata_add_flexarray(pd->data, pd->collhandle, UPDATE_COLLECTION, pd->pkghandle); ++ queue_push2(&pd->collectionq, UPDATE_COLLECTION, pd->pkghandle); + pd->pkghandle = 0; + break; + +@@ -499,11 +503,13 @@ repo_add_updateinfoxml(Repo *repo, FILE *fp, int flags) + pd.pool = pool; + pd.repo = repo; + pd.data = data; ++ queue_init(&pd.collectionq); + solv_xmlparser_init(&pd.xmlp, stateswitches, &pd, startElement, endElement); + if (solv_xmlparser_parse(&pd.xmlp, fp) != SOLV_XMLPARSER_OK) + pd.ret = pool_error(pool, -1, "repo_updateinfoxml: %s at line %u:%u", pd.xmlp.errstr, pd.xmlp.line, pd.xmlp.column); + solv_xmlparser_free(&pd.xmlp); + join_freemem(&pd.jd); ++ queue_free(&pd.collectionq); + + if (!(flags & REPO_NO_INTERNALIZE)) + repodata_internalize(data); +-- +2.41.0 + diff --git a/libsolv.spec b/libsolv.spec index ca95ebf..c6a4a8f 100644 --- a/libsolv.spec +++ b/libsolv.spec @@ -130,6 +130,7 @@ Python 3 version. 0003-Add-feature-complex_deps-to-ifelse_rec.t-testcase.patch 0004-Allow-to-break-arch-lock-step-on-erase-operations.patch 0005-Only-disable-infarch-rules-on-erase-if-the-package-w.patch +0006-Save-memory-in-repo_updateinfoxml-by-not-interleavin.patch %prep %autosetup -p1