@@ -28,6 +28,7 @@ class ConfigOptions(Enum):
2828class TextFilterTypes (Enum ):
2929 REGEX = 'Regex'
3030 POSITIONAL = 'Positional'
31+ COMBI_SEARCH = 'Combi-search'
3132 KEYWORD = 'Keyword_UNSUPPORTED'
3233 SPLIT_KEYWORDS = 'Split-keywords_UNSUPPORTED'
3334
@@ -121,6 +122,15 @@ def request_text_filter(self):
121122 'line' : int (self .ask_open_question ('Please provide the line number in the cluster: ' )),
122123 'occurrence' : int (self .ask_open_question ('Please provide the occurrence number: ' ))}
123124 return positional_text_filter
125+ elif self .config .get (ConfigOptions .TEXT_FILTER_TYPE .value ) == TextFilterTypes .COMBI_SEARCH .value :
126+ combi_search_filters = []
127+ index = 1
128+ while True :
129+ combi_search_filters .append (self .ask_open_question ('Please provide a regex filter for search {0}: ' .format (index )))
130+ index += 1
131+ if self .ask_open_question ('Do you wish to provide a next search parameter [yes/y]: ' ).lower () not in YES_INPUT :
132+ break
133+ return combi_search_filters
124134 else :
125135 raise ValueError ('Unsupported filter type: %s' % self .config .get (ConfigOptions .TEXT_FILTER_TYPE .value ))
126136
@@ -174,6 +184,8 @@ def extract_from_data(self, data, file):
174184 return self .extract_text_from_clusters_by_regex (clusters )
175185 elif self .config .get (ConfigOptions .TEXT_FILTER_TYPE .value ) == TextFilterTypes .POSITIONAL .value :
176186 return self .extract_text_from_clusters_by_position (clusters )
187+ elif self .config .get (ConfigOptions .TEXT_FILTER_TYPE .value ) == TextFilterTypes .COMBI_SEARCH .value :
188+ return self .extract_text_from_clusters_by_combi_search (clusters )
177189 else :
178190 raise ValueError ("Unsupported filter type: %s" % self .config .get (ConfigOptions .TEXT_FILTER_TYPE .value ))
179191
@@ -205,10 +217,11 @@ def get_sliced_clusters(self, clusters, start_keyword='', end_keyword=''):
205217
206218 return (clusters [start_cluster_index :end_cluster_index + 1 ])
207219
208- def extract_text_from_clusters_by_regex (self , clusters ):
220+ def extract_text_from_clusters_by_regex (self , clusters , regex_pattern = None ):
209221 filtered_text = []
222+ pattern = re .compile (regex_pattern if regex_pattern else self .config .get (ConfigOptions .TEXT_FILTER .value ))
210223 for cluster in clusters :
211- search_result = re .search (self . config . get ( ConfigOptions . TEXT_FILTER . value ), cluster )
224+ search_result = pattern .search (cluster )
212225 if search_result is not None :
213226 filtered_text .append (search_result .group (1 ))
214227 return filtered_text
@@ -224,6 +237,14 @@ def extract_text_from_clusters_by_position(self, clusters):
224237 continue
225238 return filtered_text
226239
240+ def extract_text_from_clusters_by_combi_search (self , clusters ):
241+ '''Combi-search executes multiple user-defined regex searches, isolating only the relevant clusters for the final search'''
242+ filters = self .config .get (ConfigOptions .TEXT_FILTER .value )
243+ for filter in filters [:- 1 ]:
244+ pattern = re .compile (filter )
245+ clusters = [cluster for cluster in clusters if pattern .search (cluster )]
246+ return self .extract_text_from_clusters_by_regex (clusters , filters [- 1 ])
247+
227248
228249class Comparer :
229250 def __init__ (self , source , reference ):
0 commit comments