In my previous post, I’ve shown how we can search for C# repositories in GitHub, how to search for files within a repository, and how to get the content of a file.
We can now use these to ask a very specific question: Which NuGet packages are used within a specific repository? Let’s start by defining a Package class, and a simple IEqualityComparer for it (which will help us remove duplicate packages):
public class Package
{
public string Id { get { return "Packages/" + Name + "/" + Version; } }
public string Name { get; set; }
public string Version { get; set; }
}
public class PackageComparer : IEqualityComparer<Package>
{
#region Implementation of IEqualityComparer<in Package>
public bool Equals(Package x, Package y)
{
return string.Compare(x.Id, y.Id, StringComparison.InvariantCultureIgnoreCase) == 0;
}
public int GetHashCode(Package obj)
{
return obj.Id.GetHashCode();
}
#endregion
}
Now, we’ll get all the packages.config files within our repository:
var codeSearch = new SearchCodeRequest(packages.config)
{
Repo = repo.FullName,
In = new[] {CodeInQualifier.Path}
};
var codeResult = await github.Search.SearchCode(codeSearch);
Now that we got the list of files, let’s retrieve the content of each of them:
var packagesFiles = codeResult.Items
.Where(p = p.Name == packages.config)
.Select(packageFile => github.GitDatabase.Blob.Get(repo.Owner.Login, repo.Name, packageFile.Sha).Result)
.Select(blob => blob.Content)
.Select(Convert.FromBase64String)
All we need to do now, is parse each file, and get the list of packages from the resulting XDocument:
private IEnumerablePackage GetPackages(byte[] arg)
{
using (var memoryStream = new MemoryStream(arg))
{
var xdoc = XDocument.Load(memoryStream);
var packages = xdoc.Descendants("package");
foreach (var package in packages)
{
var id = package.Attribute("id").Value;
var version = package.Attribute("version").Value;
yield return new Package
{
Name = id,
Version = version
};
}
}
}
Here’s the final resulting method, which also includes removal of duplicate packages (remember, since we can have multiple projects within a Repository, we might get the same package several times):
private async Task<IEnumerable<Package>> GetRepoPackage(GitHubClient github, Repository repository)
{
var codeSearch = new SearchCodeRequest(packages.config)
{
Repo = repository.FullName,
In = new[] {CodeInQualifier.Path}
};
var codeResult = await github.Search.SearchCode(codeSearch);
var packages = codeResult.Items
.Where(p => p.Name == "packages.config")
.Select(packageFile =>github.GitDatabase.Blob.Get(repo.Owner.Login, repo.Name, packageFile.Sha).Result)
.Select(blob => blob.Content)
.Select(Convert.FromBase64String)
.SelectMany(GetPackages)
.Distinct(new PackageRefComparer());
return packages;
}