Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
Loading items

Target

Select target project
  • coscine/backend/scripts/kpi-generator
1 result
Select Git revision
Loading items
Show changes
Commits on Source (3)
......@@ -7,7 +7,7 @@
<AssemblyName>Coscine.KpiGenerator</AssemblyName>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>0.1.4</Version>
<Version>0.1.5</Version>
</PropertyGroup>
<PropertyGroup>
......
......@@ -27,6 +27,7 @@ public abstract class Reporting<O> where O : class
private bool ReportingEnabled { get; init; }
private string ReportingDatabaseProjectId { get; init; }
private string ReportingBranch { get; init; }
public string RwthRor { get; init; }
public readonly Organization _otherOrganization = new()
......@@ -49,6 +50,8 @@ public abstract class Reporting<O> where O : class
Domain = Configuration.GetStringAndWait("coscine/local/profilesync/domain");
ReportingDatabaseProjectId = Configuration.GetStringAndWait("coscine/local/reporting/gitlab_project_id");
ReportingBranch = Configuration.GetStringAndWait("coscine/local/reporting/branch");
RwthRor = Configuration.GetStringAndWait("coscine/global/organizations/rwth/ror_url");
}
public abstract IEnumerable<ReportingFileObject> GenerateReporting();
......@@ -180,6 +183,25 @@ public abstract class Reporting<O> where O : class
return result;
}
public IEnumerable<Organization> GetTopLevelOrganizationsFromEntries(IEnumerable<Organization> organizations)
{
var result = new List<Organization>();
foreach (var org in organizations)
{
var ror = org.RorUrl;
if (ror.Contains("www.rwth-aachen.de"))
{
ror = ConvertOldRwthOrganizationToRor(ror); // e.g. <https://www.rwth-aachen.de/22000> turns into <https://ror.org/04xfq0f34#ORG-42NHW>
}
result.Add(new Organization
{
RorUrl = ror.Contains('#') ? ror[..ror.IndexOf('#')] : ror, // e.g. <https://ror.org/04xfq0f34#ORG-42NHW> turns into <https://ror.org/04xfq0f34>
Name = org.Name
});
}
return result.DistinctBy(r => r.RorUrl);
}
public static string SanitizeOrganizationRor(string organizationRor)
{
return HttpUtility.UrlEncode(organizationRor.Replace("https://ror.org/", "").ToLower());
......@@ -227,4 +249,50 @@ public abstract class Reporting<O> where O : class
}
Console.WriteLine();
}
public string ConvertOldRwthOrganizationToRor(string organization)
{
// Converts values like https://www.rwth-aachen.de/22000 to its corresponding RoR (here: https://ror.org/04xfq0f34#ORG-42NHW)
var _queryString = new SparqlParameterizedString
{
CommandText = $@"
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX org: <http://www.w3.org/ns/org#>
SELECT DISTINCT ?source ?inst
WHERE {{
# Define ?source URI
VALUES ?source
{{
<{organization}>
}} .
# Get Display Name of the institution
?source rdfs:label ?name .
# Get Institute Identifier in ?ikzid
?source org:identifier ?ikzid .
# IKZ ID is in the form 'ikz:<ID>' or 'ikz:0<ID>'
BIND(concat('ikz:', ?ikzid) AS ?ikz) .
BIND(concat('ikz:0', ?ikzid) AS ?ikz0) .
# Fetch all institutes from RWTH
<{RwthRor}> org:hasUnit ?inst .
# OR statement to search by ikz variations and name
{{ ?inst org:identifier ?ikz .}} UNION {{ ?inst org:identifier ?ikz0 .}} UNION {{ ?inst rdfs:label ?name .}}
}}
GROUP BY ?inst
"
};
using var results = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(_queryString.ToString());
var inst = results.Select(x => x.Value("inst").ToString()).ToList()[0]; // Get the value for ?inst
if (!string.IsNullOrWhiteSpace(inst))
{
Console.WriteLine($" Organization {organization} found to match {inst}");
organization = inst;
}
return organization;
}
}
\ No newline at end of file
......@@ -27,7 +27,7 @@ public class CompleteReporting : Reporting<CompleteReportingOptions>
}
catch (Exception e)
{
Console.WriteLine($"!! Skipping ProjectReporting: {e.Message}");
Console.WriteLine($"!! Skipping ProjectReporting: {e.Message} \n");
}
// Resource Reporting
......@@ -40,7 +40,7 @@ public class CompleteReporting : Reporting<CompleteReportingOptions>
}
catch (Exception e)
{
Console.WriteLine($"!! Skipping ResourceReporting: {e.Message}");
Console.WriteLine($"!! Skipping ResourceReporting: {e.Message} \n");
}
// User Reporting
......@@ -53,7 +53,7 @@ public class CompleteReporting : Reporting<CompleteReportingOptions>
}
catch (Exception e)
{
Console.WriteLine($"!! Skipping UserReporting: {e.Message}");
Console.WriteLine($"!! Skipping UserReporting: {e.Message} \n");
}
// Application Profile Reporting
......@@ -66,7 +66,7 @@ public class CompleteReporting : Reporting<CompleteReportingOptions>
}
catch (Exception e)
{
Console.WriteLine($"!! Skipping ApplicationProfileReporting: {e.Message}");
Console.WriteLine($"!! Skipping ApplicationProfileReporting: {e.Message} \n");
}
// System Status Reporting
......@@ -79,7 +79,7 @@ public class CompleteReporting : Reporting<CompleteReportingOptions>
}
catch (Exception e)
{
Console.WriteLine($"!! Skipping SystemReporting: {e.Message}");
Console.WriteLine($"!! Skipping SystemReporting: {e.Message} \n");
}
return result;
......
using Coscine.Database.DataModel;
using AngleSharp.Dom;
using Coscine.Database.DataModel;
using Coscine.Database.Models;
using Coscine.Database.ReturnObjects;
using Coscine.ResourceTypes;
......@@ -75,16 +76,23 @@ public class ProjectReporting : Reporting<ProjectReportingOptions>
private IEnumerable<ReportingFileObject> GeneratePerOrganization(List<ReturnObject> returnObjects)
{
var reportingFilesPerOrganization = new List<ReportingFileObject>();
var organizationsFromProjects = returnObjects.SelectMany(ro => ro.Organizations).DistinctBy(o => o.RorUrl);
var organizationsFromProjects = GetTopLevelOrganizationsFromEntries(returnObjects.SelectMany(ro => ro.Organizations));
foreach (var entry in organizationsFromProjects)
{
var organization = Organizations.Find(o => o.Equals(entry));
var organization = Organizations.Find(o => o.RorUrl.Equals(entry.RorUrl));
if (organization is null)
{
organization = _otherOrganization;
Console.WriteLine($" WARNING!: Organization \"{entry.RorUrl}\" could not be correctly identified. Will use \"{_otherOrganization.RorUrl}\".");
}
var returnObjectsForOrganization = returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Equals(entry.RorUrl)));
var returnObjectsForOrganization = returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Contains(entry.RorUrl))).ToList();
// Additional condition to process old RWTH Entries (Could be removed after entries in virtuoso are migrated to their correct RoRs)
if (entry.RorUrl.Equals(RwthRor))
{
returnObjectsForOrganization.AddRange(returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Contains("www.rwth-aachen.de"))).ToList());
}
reportingFilesPerOrganization.Add(new ReportingFileObject
{
Path = GetReportingPathOrganization(organization.RorUrl, ReportingFileName),
......
......@@ -10,7 +10,7 @@ public class ReturnObject
{
public Guid Id { get; set; }
public DateTime? DateCreated { get; set; } = null;
public List<Organization>? Organizations { get; set; } = new();
public List<Organization> Organizations { get; set; } = new();
public List<DisciplineObject> Disciplines { get; set; } = new();
public bool Deleted { get; set; }
public Guid ProjectVisibilityId { get; set; }
......
......@@ -74,16 +74,21 @@ public class ResourceReporting : Reporting<ResourceReportingOptions>
private IEnumerable<ReportingFileObject> GeneratePerOrganization(List<ReturnObject> returnObjects)
{
var reportingFilesPerOrganization = new List<ReportingFileObject>();
var organizationsFromResources = returnObjects.SelectMany(ro => ro.Organizations).DistinctBy(o => o.RorUrl);
var organizationsFromResources = GetTopLevelOrganizationsFromEntries(returnObjects.SelectMany(ro => ro.Organizations));
foreach (var entry in organizationsFromResources)
{
var organization = Organizations.Find(o => o.Equals(entry));
var organization = Organizations.Find(o => o.RorUrl.Equals(entry.RorUrl));
if (organization is null)
{
organization = _otherOrganization;
Console.WriteLine($" WARNING!: Organization \"{entry.RorUrl}\" could not be correctly identified. Will use \"{_otherOrganization.RorUrl}\".");
}
var returnObjectsForOrganization = returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Equals(entry.RorUrl)));
var returnObjectsForOrganization = returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Equals(entry.RorUrl))).ToList();
// Additional condition to process old RWTH Entries (Could be removed after entries in virtuoso are migrated to their correct RoRs)
if (entry.RorUrl.Equals(RwthRor))
{
returnObjectsForOrganization.AddRange(returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Contains("www.rwth-aachen.de"))).ToList());
}
reportingFilesPerOrganization.Add(new ReportingFileObject
{
......
using Coscine.Database.ReturnObjects;
using KPIGenerator.Utils;
namespace KPIGenerator.Reportings.User;
......@@ -8,8 +9,8 @@ namespace KPIGenerator.Reportings.User;
public class ReturnObject
{
public List<RelatedProject> RelatedProjects { get; set; } = new();
public List<string> Organizations { get; set; } = new();
public List<string> Institutes { get; set; } = new();
public List<Organization> Organizations { get; set; } = new();
public List<Organization> Institutes { get; set; } = new();
public List<DisciplineObject> Disciplines { get; set; } = new();
public List<ExternalAuthenticatorsObject> LoginProviders { get; set; } = new();
public DateTime? LatestActivity { get; set; } = null;
......
using Coscine.ApiCommons;
using Coscine.Database.DataModel;
using Coscine.Database.Models;
using Coscine.Metadata;
using KPIGenerator.Utils;
using Newtonsoft.Json;
using VDS.RDF.Query;
using static KPIGenerator.Utils.CommandLineOptions;
namespace KPIGenerator.Reportings.User;
public class UserReporting : Reporting<UserReportingOptions>
{
private readonly Authenticator _authenticator;
private readonly ExternalAuthenticatorModel _externalAuthenticatorModel;
private readonly ExternalIdModel _externalIdModel;
private readonly ProjectRoleModel _projectRoleModel;
......@@ -17,11 +17,11 @@ public class UserReporting : Reporting<UserReportingOptions>
private readonly RoleModel _roleModel;
private readonly UserModel _userModel;
private readonly LogModel _logModel;
private readonly IEnumerable<ExternalAuthenticator> _loginProviders;
public UserReporting(UserReportingOptions options) : base(options)
{
ReportingFileName = "users.json";
_authenticator = new Authenticator(null, Configuration);
_externalAuthenticatorModel = new ExternalAuthenticatorModel();
_externalIdModel = new ExternalIdModel();
_projectRoleModel = new ProjectRoleModel();
......@@ -29,6 +29,8 @@ public class UserReporting : Reporting<UserReportingOptions>
_roleModel = new RoleModel();
_userModel = new UserModel();
_logModel = new LogModel();
_loginProviders = _externalAuthenticatorModel.GetAll();
}
public override IEnumerable<ReportingFileObject> GenerateReporting()
......@@ -62,8 +64,8 @@ public class UserReporting : Reporting<UserReportingOptions>
var userReportEntry = new ReturnObject
{
RelatedProjects = GetRelatedProjects(user.Id),
Organizations = GetOrganizations(user.Id, userReturnObject.Organization),
Institutes = GetInstitutes(user.Id, userReturnObject.Institute),
Organizations = GetOrganizations(user.Id, userReturnObject.Organization, "organization"),
Institutes = GetOrganizations(user.Id, userReturnObject.Institute, "institute"),
Disciplines = userReturnObject.Disciplines.ToList(),
LoginProviders = userReturnObject.ExternalAuthenticators.ToList(),
LatestActivity = GetLatestActivity(user.Id)
......@@ -76,16 +78,17 @@ public class UserReporting : Reporting<UserReportingOptions>
private IEnumerable<ReportingFileObject> GeneratePerOrganization(List<ReturnObject> returnObjects)
{
var reportingFilesPerOrganization = new List<ReportingFileObject>();
var organizationsFromUsers = returnObjects.SelectMany(ro => ro.Organizations);
var organizationsFromUsers = GetTopLevelOrganizationsFromEntries(returnObjects.SelectMany(ro => ro.Organizations));
foreach (var entry in organizationsFromUsers)
{
var organization = Organizations.Find(o => o.Name.Equals(entry));
var organization = Organizations.Find(o => o.RorUrl.Equals(entry.RorUrl));
if (organization is null)
{
organization = Organizations.Find(o => o.Name.Equals("Other"));
Console.WriteLine($" WARNING!: Organization \"{entry}\" could not be correctly identified. Will use \"{organization!.Name}\".");
organization = _otherOrganization;
Console.WriteLine($" WARNING!: Organization \"{entry.RorUrl}\" could not be correctly identified. Will use \"{_otherOrganization.RorUrl}\".");
}
var returnObjectsForOrganization = returnObjects.Where(ro => ro.Organizations.Contains(entry));
var returnObjectsForOrganization = returnObjects.Where(ro => ro.Organizations.Select(o => o.RorUrl).Any(e => e.Contains(entry.RorUrl))).ToList();
reportingFilesPerOrganization.Add(new ReportingFileObject
{
......@@ -118,63 +121,153 @@ public class UserReporting : Reporting<UserReportingOptions>
return result;
}
private List<string> GetOrganizations(Guid id, string organization)
/// <summary>
/// A method that fetches the organization affiliation of a user based on a user ID
/// </summary>
/// <param name="id">Coscine User ID</param>
/// <param name="organizationLabel">Organization of the user. Will be set ONLY for ORCiD users, otherwise null.</param>
/// <returns>List of Organizations that the user belogs to. Will be empty if no organization can be found.</returns>
private List<Organization> GetOrganizations(Guid id, string organizationLabel, string searchedEntityType)
{
var externalIdModel = new ExternalIdModel();
var result = new List<string>();
/* A user login has the following possibilities:
* - ORCiD only login (Organization set by the user and is found inside the SQL DB; can't confirm validity)
* - Shibboleth only login (Organization set by Shibboleth provider and is found inside Virtuoso; is regarded as valid)
* - ORCiD AND Shibboleth login (Organization set by Shibboleth provider and is found inside Virtuoso; is regarded as valid)
*
* In the case of ORCiD only login, reflect that inside report's "LoginProvides". Organization can be found only using its Display Name (not very reliable).
* In the case of Shibboleth login, one needs to find the correct organization by using the user's ExternalId and ExternalAuthenticators.
*/
var result = new List<Organization>();
if (!string.IsNullOrWhiteSpace(organization))
var externalIds = _externalIdModel.GetAllWhere((externalId) => externalId.UserId.Equals(id));
foreach (var externalId in externalIds)
{
result.Add(organization);
var loginProvider = _externalAuthenticatorModel.GetWhere(e => e.Id.Equals(externalId.ExternalAuthenticatorId));
if (loginProvider != null)
{
switch (loginProvider.DisplayName.ToLower())
{
case "orcid":
// Find the RoR of the organization based on its "rdfs:label".
var orgOrcid = TryGetOrganizationByLabel(organizationLabel);
if (orgOrcid is null)
{
Console.WriteLine($" No {searchedEntityType} found for user with ID \"{id}\" and login provider {loginProvider.DisplayName}");
continue;
}
var externalIds = externalIdModel.GetAllWhere((externalId) => externalId.UserId.Equals(id));
var externalIdList = new List<string>();
foreach (var externalId in externalIds)
result.Add(orgOrcid);
break;
case "shibboleth":
var orgShibboleth = GetOrganizationByUserExternalId(externalId.ExternalId1);
if (orgShibboleth is null)
{
externalIdList.Add(externalId.ExternalId1);
Console.WriteLine($" No {searchedEntityType} found for user with ID \"{id}\" and login provider {loginProvider.DisplayName}");
continue;
}
result.Add(orgShibboleth);
break;
default:
Console.WriteLine($"!! Could not verify the login provider for user with ID \"{id}\"");
break;
}
}
else
{
Console.WriteLine($"!! Could not verify the login provider for user with ID \"{id}\" and External Authenticator ID \"{externalId.ExternalAuthenticatorId}\"");
}
}
var resultSet = RdfStoreConnector.GetTriples(null, null, null, 1, externalIdList);
var organizationTriples = resultSet.Where(r => !r.Subject.ToString().Contains('#')).Distinct().ToList();
foreach (var triple in organizationTriples)
var orcidLoginProvider = _loginProviders.Single(lp => lp.DisplayName.ToLower().Equals("orcid"));
if (!string.IsNullOrWhiteSpace(organizationLabel) && !externalIds.Any(e => e.ExternalAuthenticatorId.Equals(orcidLoginProvider.Id)))
{
result.Add(triple.Object.ToString());
// Special case, a user has an organization set, but does not have an external authenticator for ODCiD. Should not be possible.
Console.WriteLine($" !! User with ID \"{id}\" has an organization set inside the Database but does not own a login provider {orcidLoginProvider.DisplayName}");
// Find the RoR of the organization based on its "rdfs:label".
var orgOrcid = TryGetOrganizationByLabel(organizationLabel);
if (orgOrcid is null)
{
Console.WriteLine($" No {searchedEntityType} found for user with ID \"{id}\" and login provider {orcidLoginProvider.DisplayName}");
}
result.Add(orgOrcid);
}
return result;
return result.DistinctBy(e => e.RorUrl).ToList();
}
private List<string> GetInstitutes(Guid id, string institute)
private Organization? GetOrganizationByUserExternalId(string userExternalId)
{
var _queryString = new SparqlParameterizedString()
{
var result = new List<string>();
CommandText = $@"
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX org: <http://www.w3.org/ns/org#>
if (!string.IsNullOrWhiteSpace(institute))
SELECT DISTINCT ?ror
WHERE {{
?ror rdfs:label ?name .
{{
SELECT DISTINCT ?ror
WHERE {{
?class ?p ?memberUrl ;
org:organization ?ror .
{{
SELECT DISTINCT ?memberUrl
WHERE {{
?memberUrl ?p ?value .
FILTER( ?value IN ( '{userExternalId}' ))
}}
}}
}}
}}
}}
"
};
using var results = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(_queryString.ToString());
if (results.IsEmpty)
{
result.Add(institute);
return null;
}
var rors = results.Select(x => x.Value("ror").ToString()).ToList(); // Get the value for ?ror
return FetchOrganizationByRor(rors[0]);
}
var externalIds = _externalIdModel.GetAllWhere((externalId) => externalId.UserId.Equals(id));
var externalIdList = new List<string>();
foreach (var externalId in externalIds)
private Organization? TryGetOrganizationByLabel(string rdfsLabel)
{
if (string.IsNullOrWhiteSpace(rdfsLabel))
{
externalIdList.Add(externalId.ExternalId1);
return null;
}
var resultSet = RdfStoreConnector.GetTriples(null, null, null, 1, externalIdList);
var instituteTriples = resultSet.Where(r => r.Subject.ToString().Contains('#')).Distinct().ToList();
var _queryString = new SparqlParameterizedString()
{
CommandText = $@"
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
foreach (var triple in instituteTriples)
SELECT DISTINCT ?ror
WHERE {{
VALUES ?name
{{
'{rdfsLabel}'
}} .
?ror rdfs:label ?name .
FILTER( CONTAINS( STR(?ror), 'ror.org' ) )
}}
"
};
using var results = RdfStoreConnector.QueryEndpoint.QueryWithResultSet(_queryString.ToString());
if (results.IsEmpty)
{
result.Add(triple.Object.ToString());
return null;
}
return result;
var ror = results.Select(x => x.Value("ror").ToString()).ToList()[0]; // Get the value for ?ror
return FetchOrganizationByRor(ror);
}
private DateTime? GetLatestActivity(Guid id)
{
/*
* TODO: Query may take ages to execute and can lead to timeouts. Make sure to create an index!
* CREATE INDEX log_idx_userid_loglevel_servertime ON Coscine.dbo.Log (UserId, LogLevel, ServerTimestamp);
*/
var latestLog = _logModel.GetAllWhere(l => l.LogLevel.Equals("Analytics") && l.UserId.Equals(id)).OrderByDescending(a => a.ServerTimestamp).FirstOrDefault();
if (latestLog is not null)
{
......