merge_set

Container for a set of files to be merged

class merge_utils.merge_set.MergeChunk(name: str, merge_hash: str, group: int = -1)[source]

Class to keep track of a chunk of files for merging

add(file: MergeFile) None[source]

Add a file to the chunk

chunk() MergeChunk[source]

Create a subset of the chunk with the same metadata

property inputs: list[str]

Get the list of input files

property json: dict

Get the chunk metadata as a JSON-compatible dictionary

property metadata: dict

Get the metadata for the chunk

property name: str

The name of the chunk

property parents: list[str]

Get the list of parent dids

property tier: int

Get the pass number for the chunk

class merge_utils.merge_set.MergeFile(data: dict)[source]

A generic data file with metadata

property did: str

name)

Type:

The file DID (namespace

property format

The file format (core.file_format)

get_fields(fields: list) tuple[source]

Get the namespace and specified metadata values from the file

Parameters:

fields – list of metadata fields to extract

Returns:

tuple of values for each field

property name: str

The file name

property namespace: str

The file namespace

class merge_utils.merge_set.MergeSet(files: list[MergeFile] | None = None)[source]

Class to keep track of a set of files for merging

add_file(file: MergeFile | dict) MergeFile[source]

Add a file to the set

Parameters:

file – A MergeFile object or a dictionary with file metadata

Returns:

the added MergeFile object, or None if it was a duplicate

add_files(files: Iterable) dict[source]

Add a collection of files to the set

Parameters:

files – collection of MergeFile objects or dictionaries with file metadata

Returns:

dict of MergeFile objects that were added

check_consistency(final: bool = False) bool[source]

Check if the files in the set have consistent namespaces and metadata fields. If not, log the inconsistencies and move the inconsistent files out of the set.

Parameters:

final – do final check and log even if bad files are allowed

Returns:

True if the files are consistent, False otherwise

check_errors(final: bool = False) bool[source]

Check for errors in the set and log them.

Parameters:

final – print final summary of errors even if bad files are allowed

Returns:

True if unskipped errors were found, False otherwise

check_missing(final: bool = False) bool[source]

Check if any files were missing metadata.

Parameters:

final – print final summary of missing files even if bad files are allowed

Returns:

True if all files have metadata, False otherwise

check_reachability(final: bool = False) bool[source]

Check if the files in the set are reachable and log any unreachable files.

Parameters:

final – print final summary of unreachable files even if bad files are allowed

Returns:

True if all files are reachable, False otherwise

check_uniqueness(final: bool = False) bool[source]

Check if the files in the set are unique and log any duplicate files.

Parameters:

final – print final summary of duplicate files even if bad files are allowed

Returns:

True if all files are unique, False otherwise

check_validity(final: bool = False) bool[source]

Check if the files in the set are valid and log any invalid files.

Parameters:

final – print final summary of invalid files even if bad files are allowed

Returns:

True if all files are valid, False otherwise

property dupes: dict

Return counts of duplicate file DIDs

property files: list[MergeFile]

Return the list of files

group_count() list[int][source]

Group input files by count

group_size() list[int][source]

Group input files by size

groups() Generator[dict, None, None][source]

Split the files into groups for merging

property hash: str

Get a hash from the list of files

set_unreachable(dids: Iterable[str]) None[source]

Mark files as unreachable, e.g. not found in Rucio or not accessible.

Parameters:

dids – list of file DIDs to mark as unreachable

property size: int

Get the total size of the files

merge_utils.merge_set.check_remote_path(path: str, timeout: float = 5) bool[source]

Check if a remote path is accessible via xrootd