Skip to content

Commit c0f8411

Browse files
author
Richard Bruskiewich
committed
Final iteration on 'relation' field capture/normalization + some file reformatting (PyCharm induced...)
1 parent f2a1722 commit c0f8411

File tree

1 file changed

+33
-34
lines changed

1 file changed

+33
-34
lines changed

kgx/graph_operations/meta_knowledge_graph.py

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ class MetaKnowledgeGraph:
7373
error_log = stderr
7474

7575
def __init__(
76-
self,
77-
name="",
78-
node_facet_properties: Optional[List] = None,
79-
edge_facet_properties: Optional[List] = None,
80-
progress_monitor: Optional[Callable[[GraphEntityType, List], None]] = None,
81-
error_log=None,
82-
**kwargs,
76+
self,
77+
name="",
78+
node_facet_properties: Optional[List] = None,
79+
edge_facet_properties: Optional[List] = None,
80+
progress_monitor: Optional[Callable[[GraphEntityType, List], None]] = None,
81+
error_log=None,
82+
**kwargs,
8383
):
8484
"""
8585
MetaKnowledgeGraph constructor.
@@ -213,8 +213,8 @@ def __init__(self, category_curie: str, mkg):
213213
Biolink Model category CURIE identifier.
214214
"""
215215
if not (
216-
_category_curie_regexp.fullmatch(category_curie)
217-
or category_curie == "unknown"
216+
_category_curie_regexp.fullmatch(category_curie)
217+
or category_curie == "unknown"
218218
):
219219
raise RuntimeError("Invalid Biolink category CURIE: " + category_curie)
220220

@@ -280,7 +280,7 @@ def get_count(self) -> int:
280280
return self.category_stats["count"]
281281

282282
def get_count_by_source(
283-
self, facet: str = "provided_by", source: str = None
283+
self, facet: str = "provided_by", source: str = None
284284
) -> Dict[str, Any]:
285285
"""
286286
Parameters
@@ -470,23 +470,23 @@ def _compile_triple_source_stats(self, triple: Tuple[str, str, str], data: Dict)
470470
)
471471

472472
@staticmethod
473-
def _normalize_and_hash_field(name, field) -> Union[str, Tuple]:
474-
if isinstance(field, List) or isinstance(field, Tuple):
473+
def _normalize_relation_field(field) -> Set:
474+
# various non-string iterables...
475+
if isinstance(field, List) or \
476+
isinstance(field, Tuple) or \
477+
isinstance(field, Set):
475478
# eliminate duplicate terms
476-
field_set = set(field)
477-
if len(field_set) == 1:
478-
# if only one element left, return as a scalar
479-
return field_set.pop()
480-
else:
481-
# otherwise, make the set of term a hashable immutable
482-
return tuple(field_set)
479+
# and normalize to a set
480+
return set(field)
483481
elif isinstance(field, str):
484-
return field
482+
# for uniformity, we coerce
483+
# to a set of one element
484+
return {field}
485485
else:
486-
raise TypeError(f"Unexpected KGX '{name}' edge data field of type '{type(field)}'")
486+
raise TypeError(f"Unexpected KGX edge 'relation' data field of type '{type(field)}'")
487487

488488
def _process_triple(
489-
self, subject_category: str, predicate: str, object_category: str, data: Dict
489+
self, subject_category: str, predicate: str, object_category: str, data: Dict
490490
):
491491
# Process the 'valid' S-P-O triple here...
492492
triple = (subject_category, predicate, object_category)
@@ -504,9 +504,9 @@ def _process_triple(
504504
# in which the relation field ends up being a list of terms, sometimes duplicated
505505

506506
if "relation" in data:
507-
data["relation"] = self._normalize_and_hash_field("relation", data["relation"])
508-
if data["relation"] not in self.association_map[triple]["relations"]:
509-
self.association_map[triple]["relations"].add(data["relation"])
507+
# input data["relation"] is normalized to a Set here
508+
data["relation"] = self._normalize_relation_field(data["relation"])
509+
self.association_map[triple]["relations"].update(data["relation"])
510510

511511
self.association_map[triple]["count"] += 1
512512

@@ -563,7 +563,6 @@ def analyse_edge(self, u, v, k, data) -> None:
563563
return
564564

565565
for obj_cat_idx in self.node_catalog[v]:
566-
567566
object_category: str = self.Category.get_category_curie_from_index(
568567
obj_cat_idx
569568
)
@@ -751,12 +750,12 @@ def get_total_edge_counts_across_mappings(self) -> int:
751750
return count
752751

753752
def get_edge_count_by_source(
754-
self,
755-
subject_category: str,
756-
predicate: str,
757-
object_category: str,
758-
facet: str = "knowledge_source",
759-
source: Optional[str] = None,
753+
self,
754+
subject_category: str,
755+
predicate: str,
756+
object_category: str,
757+
facet: str = "knowledge_source",
758+
source: Optional[str] = None,
760759
) -> Dict[str, Any]:
761760
"""
762761
Returns count by source for one S-P-O triple (S, O being Biolink categories; P, a Biolink predicate)
@@ -769,8 +768,8 @@ def get_edge_count_by_source(
769768
return dict()
770769
triple = (subject_category, predicate, object_category)
771770
if (
772-
triple in self.association_map
773-
and "count_by_source" in self.association_map[triple]
771+
triple in self.association_map
772+
and "count_by_source" in self.association_map[triple]
774773
):
775774
if facet in self.association_map[triple]["count_by_source"]:
776775
if source:

0 commit comments

Comments
 (0)