@@ -73,13 +73,13 @@ class MetaKnowledgeGraph:
7373 error_log = stderr
7474
7575 def __init__ (
76- self ,
77- name = "" ,
78- node_facet_properties : Optional [List ] = None ,
79- edge_facet_properties : Optional [List ] = None ,
80- progress_monitor : Optional [Callable [[GraphEntityType , List ], None ]] = None ,
81- error_log = None ,
82- ** kwargs ,
76+ self ,
77+ name = "" ,
78+ node_facet_properties : Optional [List ] = None ,
79+ edge_facet_properties : Optional [List ] = None ,
80+ progress_monitor : Optional [Callable [[GraphEntityType , List ], None ]] = None ,
81+ error_log = None ,
82+ ** kwargs ,
8383 ):
8484 """
8585 MetaKnowledgeGraph constructor.
@@ -213,8 +213,8 @@ def __init__(self, category_curie: str, mkg):
213213 Biolink Model category CURIE identifier.
214214 """
215215 if not (
216- _category_curie_regexp .fullmatch (category_curie )
217- or category_curie == "unknown"
216+ _category_curie_regexp .fullmatch (category_curie )
217+ or category_curie == "unknown"
218218 ):
219219 raise RuntimeError ("Invalid Biolink category CURIE: " + category_curie )
220220
@@ -280,7 +280,7 @@ def get_count(self) -> int:
280280 return self .category_stats ["count" ]
281281
282282 def get_count_by_source (
283- self , facet : str = "provided_by" , source : str = None
283+ self , facet : str = "provided_by" , source : str = None
284284 ) -> Dict [str , Any ]:
285285 """
286286 Parameters
@@ -470,23 +470,23 @@ def _compile_triple_source_stats(self, triple: Tuple[str, str, str], data: Dict)
470470 )
471471
472472 @staticmethod
473- def _normalize_and_hash_field (name , field ) -> Union [str , Tuple ]:
474- if isinstance (field , List ) or isinstance (field , Tuple ):
473+ def _normalize_relation_field (field ) -> Set :
474+ # various non-string iterables...
475+ if isinstance (field , List ) or \
476+ isinstance (field , Tuple ) or \
477+ isinstance (field , Set ):
475478 # eliminate duplicate terms
476- field_set = set (field )
477- if len (field_set ) == 1 :
478- # if only one element left, return as a scalar
479- return field_set .pop ()
480- else :
481- # otherwise, make the set of term a hashable immutable
482- return tuple (field_set )
479+ # and normalize to a set
480+ return set (field )
483481 elif isinstance (field , str ):
484- return field
482+ # for uniformity, we coerce
483+ # to a set of one element
484+ return {field }
485485 else :
486- raise TypeError (f"Unexpected KGX ' { name } ' edge data field of type '{ type (field )} '" )
486+ raise TypeError (f"Unexpected KGX edge 'relation' data field of type '{ type (field )} '" )
487487
488488 def _process_triple (
489- self , subject_category : str , predicate : str , object_category : str , data : Dict
489+ self , subject_category : str , predicate : str , object_category : str , data : Dict
490490 ):
491491 # Process the 'valid' S-P-O triple here...
492492 triple = (subject_category , predicate , object_category )
@@ -504,9 +504,9 @@ def _process_triple(
504504 # in which the relation field ends up being a list of terms, sometimes duplicated
505505
506506 if "relation" in data :
507- data ["relation" ] = self . _normalize_and_hash_field ( "relation" , data [ "relation" ])
508- if data ["relation" ] not in self .association_map [ triple ][ "relations" ]:
509- self .association_map [triple ]["relations" ].add (data ["relation" ])
507+ # input data["relation"] is normalized to a Set here
508+ data ["relation" ] = self ._normalize_relation_field ( data [ "relation" ])
509+ self .association_map [triple ]["relations" ].update (data ["relation" ])
510510
511511 self .association_map [triple ]["count" ] += 1
512512
@@ -563,7 +563,6 @@ def analyse_edge(self, u, v, k, data) -> None:
563563 return
564564
565565 for obj_cat_idx in self .node_catalog [v ]:
566-
567566 object_category : str = self .Category .get_category_curie_from_index (
568567 obj_cat_idx
569568 )
@@ -751,12 +750,12 @@ def get_total_edge_counts_across_mappings(self) -> int:
751750 return count
752751
753752 def get_edge_count_by_source (
754- self ,
755- subject_category : str ,
756- predicate : str ,
757- object_category : str ,
758- facet : str = "knowledge_source" ,
759- source : Optional [str ] = None ,
753+ self ,
754+ subject_category : str ,
755+ predicate : str ,
756+ object_category : str ,
757+ facet : str = "knowledge_source" ,
758+ source : Optional [str ] = None ,
760759 ) -> Dict [str , Any ]:
761760 """
762761 Returns count by source for one S-P-O triple (S, O being Biolink categories; P, a Biolink predicate)
@@ -769,8 +768,8 @@ def get_edge_count_by_source(
769768 return dict ()
770769 triple = (subject_category , predicate , object_category )
771770 if (
772- triple in self .association_map
773- and "count_by_source" in self .association_map [triple ]
771+ triple in self .association_map
772+ and "count_by_source" in self .association_map [triple ]
774773 ):
775774 if facet in self .association_map [triple ]["count_by_source" ]:
776775 if source :
0 commit comments