Skip to content
Snippets Groups Projects
Commit 3a59a828 authored by Jonathan Hartman's avatar Jonathan Hartman
Browse files

Merge branch 'release-0.4.4' into 'main'

Release 0.4.4

See merge request fair-ds/ap-4-2-demonstrator/ap-4.2-data-validation-and-quality-assurance-demonstrator!166
parents 45aa6316 9fac0ff4
Branches
Tags
No related merge requests found
......@@ -34,11 +34,22 @@ workflow:
# rules:
# - !reference [{template name}, rules]
# Run this job only if the commit tag is a semantic version
# Run this job only if the commit tag is a semantic version and we are on the main branch,
# which means we are making a release
.release-tag:
rules:
- if: ($CI_COMMIT_TAG =~ /[0-9]*\.[0-9]*\.[0-9]*/ && $CI_COMMIT_BRANCH == "main")
# Run this job only if the commit tag is a semantic version without a pre-release
.tag:
rules:
- if: $CI_COMMIT_TAG =~ /[0-9]*\.[0-9]*\.[0-9]*/
# Run this job only if the commit tag is a semantic version with a pre-release
.pre-release-tag:
rules:
- if: $CI_COMMIT_TAG =~ /[0-9]*\.[0-9]*\.[0-9]*-[a-zA-Z]*/
# Run this job only when there is a merge request event and the title does not start with "Draft:"
.merge_request:
rules:
......
......@@ -28,11 +28,12 @@ build_container_release_latest:
script:
- /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --build-arg CI_JOB_TOKEN=$CI_JOB_TOKEN --build-arg BRANCH=$CI_COMMIT_TAG --destination $CI_REGISTRY/$CI_PROJECT_PATH/main:${CI_COMMIT_TAG} --destination $CI_REGISTRY/$CI_PROJECT_PATH/main:latest
rules:
- !reference [.tag, rules]
- !reference [.release-tag, rules]
build_container_release_tag:
extends: .build_container_template
script:
- /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --build-arg CI_JOB_TOKEN=$CI_JOB_TOKEN --build-arg BRANCH=$CI_COMMIT_TAG --destination $CI_REGISTRY/$CI_PROJECT_PATH/main:${CI_COMMIT_TAG} --destination $CI_REGISTRY/$CI_PROJECT_PATH/main:latest
rules:
- !reference [.tag, rules]
- !reference [.release-tag, rules]
- !reference [.pre-release-tag, rules]
......@@ -5,8 +5,12 @@ publish_package:
stage: publish
extends: .poetry-template
script:
# When we trigger this job via a release tag (which will always be a semantic version), use
# the tag as the version number. This is the step I always forget to do manually.
- export VERSION_TAG=${CI_COMMIT_TAG}
- sed -i "s/^version = .*/version = \"${VERSION_TAG}\"/" pyproject.toml
- poetry config repositories.gitlab ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi
- poetry build
- poetry publish --repository gitlab -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD}
rules:
- !reference [.tag, rules]
- !reference [.release-tag, rules]
......@@ -33,12 +33,13 @@ pytest:
extends: .pytest-template
rules:
- !reference [.merge_request, rules]
- !reference [.tag, rules]
- !reference [.pre-release-tag, rules]
# Rule definition for all other pytest jobs
.rules-pytest-versions: &rules-pytest-versions
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
- !reference [.tag, rules]
- !reference [.release-tag, rules]
# Define jobs for each supported Python version
pytest:3.10:
......
[tool.poetry]
name = "fair-ds-ap42"
version = "0.4.3"
version = "0.0.0"
description = "Data Validation and Quality Assurance Demonstrator for FAIR-DS Project Section 4.2"
authors = ["Jonathan Hartman <hartman@itc.rwth-aachen.de>"]
readme = "README.md"
......
......@@ -28,9 +28,6 @@ def get_local_directory_sources() -> list[LocalDirectory]:
"""
sources = []
# We always have the current working directory as a source
sources.append(LocalDirectory(location="."))
# Add any other local directories that the user has provided in the settings
sources.append(LocalDirectory(location=settings.directories.data_directory))
sources.append(LocalDirectory(location=settings.directories.schema_directory))
......
......@@ -7,19 +7,21 @@ Provides default values for settings that are not provided in the settings file
from dataclasses import dataclass
from pathlib import Path
from .get_env_or_default import get_env_or_default
@dataclass
class DirectorySettings:
"""Object for storing directory settings"""
root: Path = Path("")
data: Path = Path("data")
schemas: Path = Path("schemas")
data: Path = None
schemas: Path = None
def __post_init__(self):
self.root = Path(self.root)
self.data = self.root / self.data
self.schemas = self.root / self.schemas
self.data = Path(get_env_or_default(self.data, "DATA_DIRECTORY", "data"))
self.schemas = Path(get_env_or_default(self.schemas, "SCHEMAS_DIRECTORY", "schemas"))
@property
def output_directory(self) -> str:
......
......@@ -4,20 +4,29 @@ File pattern settings
Provides default values for settings that are not provided in the settings file
"""
from dataclasses import dataclass, field
from dataclasses import dataclass
from .get_env_or_default import get_env_or_default
@dataclass
class FilePatternSettings:
"""Object for storing file pattern settings"""
data_file_patterns: list[str] = field(
default_factory=lambda: ["*.csv", "*.parquet"]
)
schema_file_patterns: list[str] = field(default_factory=lambda: ["*.json"])
ignore_patterns: list[str] = field(
default_factory=lambda: ["*schemas.json", "*results.json"]
)
data_file_patterns: list[str] = None
schema_file_patterns: list[str] = None
ignore_patterns: list[str] = None
def __post_init__(self):
self.data_file_patterns = get_env_or_default(
self.data_file_patterns, "DATA_FILE_PATTERNS", ["*.csv", "*.parquet"]
)
self.schema_file_patterns = get_env_or_default(
self.schema_file_patterns, "SCHEMA_FILE_PATTERNS", ["*.json"]
)
self.ignore_patterns = get_env_or_default(
self.ignore_patterns, "FILE_IGNORE_PATTERNS", ["*schemas.json", "*results.json"]
)
@property
def all_file_patterns(self) -> list[str]:
......
"""
get_env_or_default.py
We have some settings that we want to be able to set in multiple ways. We want to be able to set
them in a settings file, as an environment variable, or use a default value. This function allows
us to do that. It takes a provided value, an environment variable name, and a default value. If
the provided value is not None, it returns that value. If the environment variable is set, it
returns that value. Otherwise, it returns the default value.
We additionally check the type of the default value and parse the environment variable accordingly.
This allows us to set lists and dictionaries as environment variables and have them parsed
correctly.
Usage:
```python
get_env_or_default(provided_value, "ENV_VAR_NAME", default_value)
```
"""
import json
import os
import re
import typing
T = typing.TypeVar("T")
def get_env_or_default(provided: T, env_var_name: str, default: T) -> T:
"""
Get the value from the provided value, environment variable, or default.
We prioritize the provided value, then the environment variable, and finally the default.
Args:
provided (T): The provided value
env_var_name (str): The name of the environment variable
default (T): The default value
Returns:
T: The value to use
"""
if provided is not None:
return provided
if env_var_name not in os.environ:
return default
returned_value = os.getenv(env_var_name)
# If the default is a list, check to see if the environment var looks like a literal list
# or a comma-separated list and parse it accordingly
if isinstance(default, list):
if re.match(r"^\[.*\]$", returned_value):
# If the value has leading and trailing brackets, remove them
returned_value = returned_value[1:-1]
return [value.strip() for value in returned_value.split(",")]
# If the default is a dictionary, parse the environment variable as JSON
if isinstance(default, dict):
return json.loads(returned_value)
# Otherwise, return the value as is
return returned_value
......@@ -25,6 +25,6 @@ class BaseCountPlot(BasePlot):
if len(counts) > 6:
other_col = pd.Series(counts.iloc[5:].sum(), ["Other Values"])
counts = counts.iloc[:5].append(other_col)
counts = pd.concat([counts.iloc[:5], other_col])
return counts
from pathlib import Path
import pytest
from fair_ds_ap42.settings.directory_settings import DirectorySettings
@pytest.fixture
def set_env_vars(monkeypatch):
"""
Fixture that sets environment variables for a single test.
"""
monkeypatch.setenv("DATA_DIRECTORY", "env_data")
monkeypatch.setenv("SCHEMAS_DIRECTORY", "env_schemas")
def test_default_values():
"""
We would set "data" and "schemas" as default values if nothing else is provided.
"""
settings = DirectorySettings()
assert settings.root == Path("")
assert settings.data == Path("data")
assert settings.schemas == Path("schemas")
def test_environment_variable_values(set_env_vars):
"""
If we find the DATA_DIRECTORY and SCHEMAS_DIRECTORY environment variables, we should use
those values.
"""
settings = DirectorySettings()
assert settings.data == Path("env_data")
assert settings.schemas == Path("env_schemas")
def test_provided_values():
"""
If the user provides values for "data" and "schemas", we should use those values.
"""
settings = DirectorySettings(data="provided_data", schemas="provided_schemas")
assert settings.data == Path("provided_data")
assert settings.schemas == Path("provided_schemas")
def test_mixed_values(set_env_vars):
"""
If the user provides values for "data" and "schemas", we should use those values, even if the
environment variables are set.
"""
settings = DirectorySettings(data="provided_data")
assert settings.data == Path("provided_data")
assert settings.schemas == Path("env_schemas")
import pytest
from fair_ds_ap42.settings.file_pattern_settings import FilePatternSettings
@pytest.fixture
def set_env_vars(monkeypatch):
"""
Fixture that sets environment variables for a single test.
"""
monkeypatch.setenv("DATA_FILE_PATTERNS", "[custom_data_pattern]")
monkeypatch.setenv("SCHEMA_FILE_PATTERNS", "[custom_schema_pattern]")
monkeypatch.setenv("FILE_IGNORE_PATTERNS", "[custom_ignore_pattern]")
@pytest.fixture
def set_env_vars_comma_sep(monkeypatch):
"""
Fixture that sets environment variables for a single test.
"""
monkeypatch.setenv("DATA_FILE_PATTERNS", "pattern1, pattern2")
monkeypatch.setenv("SCHEMA_FILE_PATTERNS", "pattern3, pattern4")
monkeypatch.setenv("FILE_IGNORE_PATTERNS", "pattern5")
def test_default_values():
"""
We would set "data" and "schemas" as default values if nothing else is provided.
"""
settings = FilePatternSettings()
assert settings.data_file_patterns == ["*.csv", "*.parquet"]
assert settings.schema_file_patterns == ["*.json"]
assert settings.ignore_patterns == ["*schemas.json", "*results.json"]
def test_environment_variable_values(set_env_vars):
"""
If we find the DATA_FILE_PATTERNS, SCHEMA_FILE_PATTERNS or FILE_IGNORE_PATTERNS environment
variables, we should use those values.
"""
settings = FilePatternSettings()
assert settings.data_file_patterns == ["custom_data_pattern"]
assert settings.schema_file_patterns == ["custom_schema_pattern"]
assert settings.ignore_patterns == ["custom_ignore_pattern"]
def test_provided_values():
"""
If the user provides values for the file patterns and ignore patterns, we should use those
values.
"""
settings = FilePatternSettings(
data_file_patterns=["provided_data"],
schema_file_patterns=["provided_schemas"],
ignore_patterns=["provided_ignore"])
assert settings.data_file_patterns == ["provided_data"]
assert settings.schema_file_patterns == ["provided_schemas"]
assert settings.ignore_patterns == ["provided_ignore"]
def test_mixed_values(set_env_vars):
"""
If the user provides values for file patterns and ignore patterns, we should use those values,
even if the environment variables are also set.
"""
settings = FilePatternSettings(
data_file_patterns=["provided_data_pattern"],
ignore_patterns=["provided_ignore_pattern"])
assert settings.data_file_patterns == ["provided_data_pattern"]
assert settings.schema_file_patterns == ["custom_schema_pattern"]
assert settings.ignore_patterns == ["provided_ignore_pattern"]
def test_comma_separated_values(set_env_vars_comma_sep):
"""
If the user provides values for file patterns and ignore patterns, we should use those values,
even if the environment variables are also set.
"""
settings = FilePatternSettings()
assert settings.data_file_patterns == ["pattern1", "pattern2"]
assert settings.schema_file_patterns == ["pattern3", "pattern4"]
assert settings.ignore_patterns == ["pattern5"]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment