Added regex to delete new lines and * from ObjectURI
This commit is contained in:
parent
007f1e9554
commit
64e355e80c
@ -182,3 +182,9 @@ class PipelineApplier():
|
||||
# as input two dataframe, one with 2 column
|
||||
return None
|
||||
|
||||
def regex_on_objects(self, RDF: pd.DataFrame) -> pd.DataFrame:
|
||||
RDF["ObjectURI"] = (RDF["ObjectURI"].astype("string")
|
||||
.str.replace(r"\r?\n+", ", ", regex=True) # newlines -> ", "
|
||||
.str.replace(r"\*", "", regex=True)) # delete all asterisks
|
||||
|
||||
return RDF
|
||||
@ -92,6 +92,8 @@ class Pipeline():
|
||||
# other filter
|
||||
#
|
||||
RDF = self.filter_applier.delete_relationship_by_list_filter(RDF)
|
||||
# regex on ObjectURI
|
||||
RDF = self.filter_applier.regex_on_objects(RDF)
|
||||
if RDF.empty:
|
||||
continue
|
||||
RDF = self.filter_applier.rdf_add_special_token(RDF) # WARNING, THIS MUST BE DONE AFTER FILTER BY FREQUENCE
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user