Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix indexing of non-standard libs including scala-compiler, scala-library etc #1360

Merged
merged 1 commit into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,30 @@ import scaladex.infra.DataPaths
case class NonStandardLib(
groupId: String,
artifactId: String,
lookup: ScalaTargetLookup
lookup: BinaryVersionLookup
)

sealed trait ScalaTargetLookup
sealed trait BinaryVersionLookup

/**
* The version is encoded in the pom file
* dependency on org.scala-lang:scala-library
* ex: io.gatling : gatling-compiler : 2.2.2
*/
case object ScalaTargetFromPom extends ScalaTargetLookup
object BinaryVersionLookup {
/**
* The version is encoded in the pom file
* dependency on org.scala-lang:scala-library
* ex: io.gatling : gatling-compiler : 2.2.2
*/
case object FromDependency extends BinaryVersionLookup

/**
* The project is a plain-java project, thus no ScalaTarget.
* ex: com.typesafe : config : 1.3.1
*/
case object NoScalaTargetPureJavaDependency extends ScalaTargetLookup
/**
* The project is a plain-java project, thus no ScalaTarget.
* ex: com.typesafe : config : 1.3.1
*/
case object Java extends BinaryVersionLookup

/**
* The version is encoded in the version (ex: scala-library itself)
*/
case object ScalaTargetFromVersion extends ScalaTargetLookup
/**
* The version is encoded in the version (ex: scala-library itself)
*/
case object FromArtifactVersion extends BinaryVersionLookup
}

object NonStandardLib {

Expand All @@ -58,9 +60,9 @@ object NonStandardLib {
case (artifact, rawLookup) =>
val lookup =
rawLookup match {
case "pom" => ScalaTargetFromPom
case "java" => NoScalaTargetPureJavaDependency
case "version" => ScalaTargetFromVersion
case "pom" => BinaryVersionLookup.FromDependency
case "java" => BinaryVersionLookup.Java
case "version" => BinaryVersionLookup.FromArtifactVersion
case _ => sys.error("unknown lookup: '" + rawLookup + "'")
}

Expand Down
2 changes: 1 addition & 1 deletion modules/server/src/main/scala/scaladex/server/Server.scala
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ object Server extends LazyLogging {
val filesystem = FilesystemStorage(config.filesystem)
val publishProcess = PublishProcess(paths, filesystem, webDatabase, config.env)(publishPool, system)
val sonatypeClient = new SonatypeClientImpl()
val sonatypeSynchronizer = new SonatypeService(schedulerDatabase, sonatypeClient, publishProcess)
val sonatypeSynchronizer = new SonatypeService(paths, schedulerDatabase, sonatypeClient, publishProcess)
val adminService =
new AdminService(config.env, schedulerDatabase, searchEngine, githubClient, sonatypeSynchronizer)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,14 @@ class AdminService(
githubClientOpt.map { client =>
val githubUpdater = new GithubUpdater(database, client)
new JobScheduler(Job.githubInfo, githubUpdater.updateAll)
} ++
Option.when(!env.isLocal)(new JobScheduler(Job.missingMavenArtifacts, sonatypeSynchronizer.findMissing))
} ++ (
if (!env.isLocal) {
Seq(
new JobScheduler(Job.missingMavenArtifacts, sonatypeSynchronizer.findMissing),
new JobScheduler(Job.nonStandardArtifacts, sonatypeSynchronizer.findNonStandard)
)
} else Seq.empty
)
seq.map(s => s.job.name -> s).toMap
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,12 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
* if the developer follow this convention we extract the relevant parts and we mark
* the library as standard. Otherwise we either have a library like gatling or the scala library itself
*
* @return The artifact name (without suffix), the Scala target, whether this project is a usual Scala library or not
* @return The artifact name (without suffix), the binary version, whether this project is a standard Scala library or not
*/
private def extractMeta(pom: ArtifactModel): Option[ArtifactMeta] = {
val nonStandardLookup =
nonStandardLibs
.find(lib =>
lib.groupId == pom.groupId &&
lib.artifactId == pom.artifactId
nonStandardLibs.find(lib =>
lib.groupId == pom.groupId && lib.artifactId == pom.artifactId
)
.map(_.lookup)

Expand Down Expand Up @@ -118,7 +116,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
}

// For example: io.gatling
case Some(ScalaTargetFromPom) =>
case Some(BinaryVersionLookup.FromDependency) =>
for {
dep <- pom.dependencies.find { dep =>
dep.groupId == "org.scala-lang" &&
Expand All @@ -133,7 +131,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
isNonStandard = true
)
// For example: typesafe config
case Some(NoScalaTargetPureJavaDependency) =>
case Some(BinaryVersionLookup.Java) =>
Some(
ArtifactMeta(
artifactName = pom.artifactId,
Expand All @@ -143,7 +141,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
)

// For example: scala-compiler
case Some(ScalaTargetFromVersion) =>
case Some(BinaryVersionLookup.FromArtifactVersion) =>
for (version <- SemanticVersion.parse(pom.version))
yield ArtifactMeta(
artifactName = pom.artifactId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,65 @@ import scaladex.core.model.Artifact._
import scaladex.core.service.SchedulerDatabase
import scaladex.core.service.SonatypeClient
import scaladex.core.util.ScalaExtensions._
import scaladex.data.cleanup.NonStandardLib
import scaladex.infra.DataPaths

class SonatypeService(
dataPaths: DataPaths,
database: SchedulerDatabase,
sonatypeService: SonatypeClient,
publishProcess: PublishProcess
)(implicit ec: ExecutionContext)
extends LazyLogging {
import SonatypeService._

def findNonStandard(): Future[String] = {
val nonStandardLibs = NonStandardLib.load(dataPaths)
for {
mavenReferenceFromDatabase <- database.getAllMavenReferences()
result <- nonStandardLibs.mapSync { lib =>
val groupId = Artifact.GroupId(lib.groupId)
// get should not throw: it is a fixed set of artifactIds
val artifactId = Artifact.ArtifactId.parse(lib.artifactId).get
findAndIndexMissingArtifacts(groupId, artifactId, mavenReferenceFromDatabase.toSet)
}
} yield s"Inserted ${result.sum} missing poms"
}

def findMissing(): Future[String] =
for {
groupIds <- database.getAllGroupIds()
mavenReferenceFromDatabase <- database.getAllMavenReferences().map(_.toSet)
groupIds = mavenReferenceFromDatabase.map(_.groupId).toSeq.sorted.map(Artifact.GroupId)
// we sort just to estimate through the logs the percentage of progress
result <- groupIds.sortBy(_.value).mapSync(g => findAndIndexMissingArtifacts(g, None))
} yield s"Inserted ${result.size} missing poms"
result <- groupIds.mapSync(g => findAndIndexMissingArtifacts(g, None, mavenReferenceFromDatabase))
} yield s"Inserted ${result.sum} missing poms"

def syncOne(groupId: GroupId, artifactNameOpt: Option[Artifact.Name]): Future[String] =
for {
result <- findAndIndexMissingArtifacts(groupId, artifactNameOpt)
mavenReferenceFromDatabase <- database.getAllMavenReferences()
result <- findAndIndexMissingArtifacts(groupId, artifactNameOpt, mavenReferenceFromDatabase.toSet)
} yield s"Inserted ${result} poms"

private def findAndIndexMissingArtifacts(groupId: GroupId, artifactNameOpt: Option[Artifact.Name]): Future[Int] =
private def findAndIndexMissingArtifacts(groupId: GroupId, artifactNameOpt: Option[Artifact.Name], knownRefs: Set[MavenReference]): Future[Int] =
for {
mavenReferenceFromDatabase <- database.getAllMavenReferences()
artifactIds <- sonatypeService.getAllArtifactIds(groupId)
scalaArtifactIds = artifactIds.filter(artifact =>
artifactNameOpt.forall(_ == artifact.name) && artifact.isScala && artifact.binaryVersion.isValid
)
result <- scalaArtifactIds
.mapSync(id => findAndIndexMissingArtifacts(groupId, id, mavenReferenceFromDatabase.toSet))
.mapSync(id => findAndIndexMissingArtifacts(groupId, id, knownRefs))
} yield result.sum

private def findAndIndexMissingArtifacts(
groupId: GroupId,
artifactId: ArtifactId,
mavenReferenceFromDatabase: Set[MavenReference]
knownRefs: Set[MavenReference]
): Future[Int] =
for {
versions <- sonatypeService.getAllVersions(groupId, artifactId)
mavenReferences = versions.map(v =>
MavenReference(groupId = groupId.value, artifactId = artifactId.value, version = v.toString)
)
missingVersions = findMissingVersions(mavenReferenceFromDatabase, mavenReferences)
missingVersions = mavenReferences.filterNot(knownRefs)
_ = if (missingVersions.nonEmpty)
logger.warn(s"${missingVersions.size} artifacts are missing for ${groupId.value}:${artifactId.value}")
missingPomFiles <- missingVersions.map(ref => sonatypeService.getPomFile(ref).map(_.map(ref -> _))).sequence
Expand All @@ -63,10 +79,4 @@ class SonatypeService(
case PublishResult.Success => true
case _ => false
}

}

object SonatypeService {
def findMissingVersions(fromDatabase: Set[MavenReference], fromSonatype: Seq[MavenReference]): Seq[MavenReference] =
fromSonatype.filterNot(fromDatabase)
}

This file was deleted.

5 changes: 5 additions & 0 deletions modules/template/src/main/scala/scaladex/view/Job.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ object Job {
"Find missing artifacts in Maven Central of the known group IDs.",
24.hours
)
val nonStandardArtifacts: Job = Job(
"non-standard-artifacts",
"Find missing non-standard artifacts from Maven Central",
2.hours
)

case class Status(state: State, results: Seq[Result], progress: Option[Progress]) {
def isStarted: Boolean = state.isInstanceOf[Started]
Expand Down
Loading