From 5804f9feead1b865cda83abc5509f0e4aac150f0 Mon Sep 17 00:00:00 2001 From: Adrien Piquerez Date: Tue, 5 Mar 2024 17:13:47 +0100 Subject: [PATCH] Fix indexing of non-standard libs including scala-compiler, scala-library etc --- .../data/cleanup/NonStandardLib.scala | 42 ++++++++++--------- .../main/scala/scaladex/server/Server.scala | 2 +- .../server/service/AdminService.scala | 10 ++++- .../server/service/ArtifactConverter.scala | 14 +++---- .../server/service/SonatypeService.scala | 42 ++++++++++++------- .../service/SonatypeSynchronizerTests.scala | 22 ---------- .../src/main/scala/scaladex/view/Job.scala | 5 +++ 7 files changed, 68 insertions(+), 69 deletions(-) delete mode 100644 modules/server/src/test/scala/scaladex/server/service/SonatypeSynchronizerTests.scala diff --git a/modules/data/src/main/scala/scaladex/data/cleanup/NonStandardLib.scala b/modules/data/src/main/scala/scaladex/data/cleanup/NonStandardLib.scala index 548d925ba..ff24317da 100644 --- a/modules/data/src/main/scala/scaladex/data/cleanup/NonStandardLib.scala +++ b/modules/data/src/main/scala/scaladex/data/cleanup/NonStandardLib.scala @@ -20,28 +20,30 @@ import scaladex.infra.DataPaths case class NonStandardLib( groupId: String, artifactId: String, - lookup: ScalaTargetLookup + lookup: BinaryVersionLookup ) -sealed trait ScalaTargetLookup +sealed trait BinaryVersionLookup -/** - * The version is encoded in the pom file - * dependency on org.scala-lang:scala-library - * ex: io.gatling : gatling-compiler : 2.2.2 - */ -case object ScalaTargetFromPom extends ScalaTargetLookup +object BinaryVersionLookup { + /** + * The version is encoded in the pom file + * dependency on org.scala-lang:scala-library + * ex: io.gatling : gatling-compiler : 2.2.2 + */ + case object FromDependency extends BinaryVersionLookup -/** - * The project is a plain-java project, thus no ScalaTarget. - * ex: com.typesafe : config : 1.3.1 - */ -case object NoScalaTargetPureJavaDependency extends ScalaTargetLookup + /** + * The project is a plain-java project, thus no ScalaTarget. + * ex: com.typesafe : config : 1.3.1 + */ + case object Java extends BinaryVersionLookup -/** - * The version is encoded in the version (ex: scala-library itself) - */ -case object ScalaTargetFromVersion extends ScalaTargetLookup + /** + * The version is encoded in the version (ex: scala-library itself) + */ + case object FromArtifactVersion extends BinaryVersionLookup +} object NonStandardLib { @@ -58,9 +60,9 @@ object NonStandardLib { case (artifact, rawLookup) => val lookup = rawLookup match { - case "pom" => ScalaTargetFromPom - case "java" => NoScalaTargetPureJavaDependency - case "version" => ScalaTargetFromVersion + case "pom" => BinaryVersionLookup.FromDependency + case "java" => BinaryVersionLookup.Java + case "version" => BinaryVersionLookup.FromArtifactVersion case _ => sys.error("unknown lookup: '" + rawLookup + "'") } diff --git a/modules/server/src/main/scala/scaladex/server/Server.scala b/modules/server/src/main/scala/scaladex/server/Server.scala index d7bf80ed7..458afd1ba 100644 --- a/modules/server/src/main/scala/scaladex/server/Server.scala +++ b/modules/server/src/main/scala/scaladex/server/Server.scala @@ -69,7 +69,7 @@ object Server extends LazyLogging { val filesystem = FilesystemStorage(config.filesystem) val publishProcess = PublishProcess(paths, filesystem, webDatabase, config.env)(publishPool, system) val sonatypeClient = new SonatypeClientImpl() - val sonatypeSynchronizer = new SonatypeService(schedulerDatabase, sonatypeClient, publishProcess) + val sonatypeSynchronizer = new SonatypeService(paths, schedulerDatabase, sonatypeClient, publishProcess) val adminService = new AdminService(config.env, schedulerDatabase, searchEngine, githubClient, sonatypeSynchronizer) diff --git a/modules/server/src/main/scala/scaladex/server/service/AdminService.scala b/modules/server/src/main/scala/scaladex/server/service/AdminService.scala index 52d0c04d6..d93f53c22 100644 --- a/modules/server/src/main/scala/scaladex/server/service/AdminService.scala +++ b/modules/server/src/main/scala/scaladex/server/service/AdminService.scala @@ -44,8 +44,14 @@ class AdminService( githubClientOpt.map { client => val githubUpdater = new GithubUpdater(database, client) new JobScheduler(Job.githubInfo, githubUpdater.updateAll) - } ++ - Option.when(!env.isLocal)(new JobScheduler(Job.missingMavenArtifacts, sonatypeSynchronizer.findMissing)) + } ++ ( + if (!env.isLocal) { + Seq( + new JobScheduler(Job.missingMavenArtifacts, sonatypeSynchronizer.findMissing), + new JobScheduler(Job.nonStandardArtifacts, sonatypeSynchronizer.findNonStandard) + ) + } else Seq.empty + ) seq.map(s => s.job.name -> s).toMap } diff --git a/modules/server/src/main/scala/scaladex/server/service/ArtifactConverter.scala b/modules/server/src/main/scala/scaladex/server/service/ArtifactConverter.scala index c62e4250c..1c1911b1b 100644 --- a/modules/server/src/main/scala/scaladex/server/service/ArtifactConverter.scala +++ b/modules/server/src/main/scala/scaladex/server/service/ArtifactConverter.scala @@ -68,14 +68,12 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging { * if the developer follow this convention we extract the relevant parts and we mark * the library as standard. Otherwise we either have a library like gatling or the scala library itself * - * @return The artifact name (without suffix), the Scala target, whether this project is a usual Scala library or not + * @return The artifact name (without suffix), the binary version, whether this project is a standard Scala library or not */ private def extractMeta(pom: ArtifactModel): Option[ArtifactMeta] = { val nonStandardLookup = - nonStandardLibs - .find(lib => - lib.groupId == pom.groupId && - lib.artifactId == pom.artifactId + nonStandardLibs.find(lib => + lib.groupId == pom.groupId && lib.artifactId == pom.artifactId ) .map(_.lookup) @@ -118,7 +116,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging { } // For example: io.gatling - case Some(ScalaTargetFromPom) => + case Some(BinaryVersionLookup.FromDependency) => for { dep <- pom.dependencies.find { dep => dep.groupId == "org.scala-lang" && @@ -133,7 +131,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging { isNonStandard = true ) // For example: typesafe config - case Some(NoScalaTargetPureJavaDependency) => + case Some(BinaryVersionLookup.Java) => Some( ArtifactMeta( artifactName = pom.artifactId, @@ -143,7 +141,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging { ) // For example: scala-compiler - case Some(ScalaTargetFromVersion) => + case Some(BinaryVersionLookup.FromArtifactVersion) => for (version <- SemanticVersion.parse(pom.version)) yield ArtifactMeta( artifactName = pom.artifactId, diff --git a/modules/server/src/main/scala/scaladex/server/service/SonatypeService.scala b/modules/server/src/main/scala/scaladex/server/service/SonatypeService.scala index 1957dac5d..8e823a8ba 100644 --- a/modules/server/src/main/scala/scaladex/server/service/SonatypeService.scala +++ b/modules/server/src/main/scala/scaladex/server/service/SonatypeService.scala @@ -9,49 +9,65 @@ import scaladex.core.model.Artifact._ import scaladex.core.service.SchedulerDatabase import scaladex.core.service.SonatypeClient import scaladex.core.util.ScalaExtensions._ +import scaladex.data.cleanup.NonStandardLib +import scaladex.infra.DataPaths class SonatypeService( + dataPaths: DataPaths, database: SchedulerDatabase, sonatypeService: SonatypeClient, publishProcess: PublishProcess )(implicit ec: ExecutionContext) extends LazyLogging { - import SonatypeService._ + + def findNonStandard(): Future[String] = { + val nonStandardLibs = NonStandardLib.load(dataPaths) + for { + mavenReferenceFromDatabase <- database.getAllMavenReferences() + result <- nonStandardLibs.mapSync { lib => + val groupId = Artifact.GroupId(lib.groupId) + // get should not throw: it is a fixed set of artifactIds + val artifactId = Artifact.ArtifactId.parse(lib.artifactId).get + findAndIndexMissingArtifacts(groupId, artifactId, mavenReferenceFromDatabase.toSet) + } + } yield s"Inserted ${result.sum} missing poms" + } def findMissing(): Future[String] = for { - groupIds <- database.getAllGroupIds() + mavenReferenceFromDatabase <- database.getAllMavenReferences().map(_.toSet) + groupIds = mavenReferenceFromDatabase.map(_.groupId).toSeq.sorted.map(Artifact.GroupId) // we sort just to estimate through the logs the percentage of progress - result <- groupIds.sortBy(_.value).mapSync(g => findAndIndexMissingArtifacts(g, None)) - } yield s"Inserted ${result.size} missing poms" + result <- groupIds.mapSync(g => findAndIndexMissingArtifacts(g, None, mavenReferenceFromDatabase)) + } yield s"Inserted ${result.sum} missing poms" def syncOne(groupId: GroupId, artifactNameOpt: Option[Artifact.Name]): Future[String] = for { - result <- findAndIndexMissingArtifacts(groupId, artifactNameOpt) + mavenReferenceFromDatabase <- database.getAllMavenReferences() + result <- findAndIndexMissingArtifacts(groupId, artifactNameOpt, mavenReferenceFromDatabase.toSet) } yield s"Inserted ${result} poms" - private def findAndIndexMissingArtifacts(groupId: GroupId, artifactNameOpt: Option[Artifact.Name]): Future[Int] = + private def findAndIndexMissingArtifacts(groupId: GroupId, artifactNameOpt: Option[Artifact.Name], knownRefs: Set[MavenReference]): Future[Int] = for { - mavenReferenceFromDatabase <- database.getAllMavenReferences() artifactIds <- sonatypeService.getAllArtifactIds(groupId) scalaArtifactIds = artifactIds.filter(artifact => artifactNameOpt.forall(_ == artifact.name) && artifact.isScala && artifact.binaryVersion.isValid ) result <- scalaArtifactIds - .mapSync(id => findAndIndexMissingArtifacts(groupId, id, mavenReferenceFromDatabase.toSet)) + .mapSync(id => findAndIndexMissingArtifacts(groupId, id, knownRefs)) } yield result.sum private def findAndIndexMissingArtifacts( groupId: GroupId, artifactId: ArtifactId, - mavenReferenceFromDatabase: Set[MavenReference] + knownRefs: Set[MavenReference] ): Future[Int] = for { versions <- sonatypeService.getAllVersions(groupId, artifactId) mavenReferences = versions.map(v => MavenReference(groupId = groupId.value, artifactId = artifactId.value, version = v.toString) ) - missingVersions = findMissingVersions(mavenReferenceFromDatabase, mavenReferences) + missingVersions = mavenReferences.filterNot(knownRefs) _ = if (missingVersions.nonEmpty) logger.warn(s"${missingVersions.size} artifacts are missing for ${groupId.value}:${artifactId.value}") missingPomFiles <- missingVersions.map(ref => sonatypeService.getPomFile(ref).map(_.map(ref -> _))).sequence @@ -63,10 +79,4 @@ class SonatypeService( case PublishResult.Success => true case _ => false } - -} - -object SonatypeService { - def findMissingVersions(fromDatabase: Set[MavenReference], fromSonatype: Seq[MavenReference]): Seq[MavenReference] = - fromSonatype.filterNot(fromDatabase) } diff --git a/modules/server/src/test/scala/scaladex/server/service/SonatypeSynchronizerTests.scala b/modules/server/src/test/scala/scaladex/server/service/SonatypeSynchronizerTests.scala deleted file mode 100644 index dc230eea2..000000000 --- a/modules/server/src/test/scala/scaladex/server/service/SonatypeSynchronizerTests.scala +++ /dev/null @@ -1,22 +0,0 @@ -package scaladex.server.service - -import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.should.Matchers -import scaladex.core.model.Artifact.MavenReference - -class SonatypeSynchronizerTests extends AnyFunSpec with Matchers { - describe("SonatypeSynchronizer") { - it("should find missing version from sonatype") { - val groupId = "org" - val artifactId = "test" - val mavenReferenceDatabase = Set(MavenReference(groupId, artifactId, "0.1")) - val mavenReferenceSonatype = - Seq(MavenReference(groupId, artifactId, "0.1"), MavenReference(groupId, artifactId, "0.2")) - - SonatypeService.findMissingVersions(mavenReferenceDatabase, mavenReferenceSonatype) shouldBe Seq( - MavenReference(groupId, artifactId, "0.2") - ) - } - } - -} diff --git a/modules/template/src/main/scala/scaladex/view/Job.scala b/modules/template/src/main/scala/scaladex/view/Job.scala index 20ae3f03f..d035d33c3 100644 --- a/modules/template/src/main/scala/scaladex/view/Job.scala +++ b/modules/template/src/main/scala/scaladex/view/Job.scala @@ -44,6 +44,11 @@ object Job { "Find missing artifacts in Maven Central of the known group IDs.", 24.hours ) + val nonStandardArtifacts: Job = Job( + "non-standard-artifacts", + "Find missing non-standard artifacts from Maven Central", + 2.hours + ) case class Status(state: State, results: Seq[Result], progress: Option[Progress]) { def isStarted: Boolean = state.isInstanceOf[Started]