Added regex to delete new lines and * from ObjectURI
This commit is contained in:
parent
007f1e9554
commit
64e355e80c
@ -182,3 +182,9 @@ class PipelineApplier():
|
|||||||
# as input two dataframe, one with 2 column
|
# as input two dataframe, one with 2 column
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def regex_on_objects(self, RDF: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
RDF["ObjectURI"] = (RDF["ObjectURI"].astype("string")
|
||||||
|
.str.replace(r"\r?\n+", ", ", regex=True) # newlines -> ", "
|
||||||
|
.str.replace(r"\*", "", regex=True)) # delete all asterisks
|
||||||
|
|
||||||
|
return RDF
|
||||||
@ -92,6 +92,8 @@ class Pipeline():
|
|||||||
# other filter
|
# other filter
|
||||||
#
|
#
|
||||||
RDF = self.filter_applier.delete_relationship_by_list_filter(RDF)
|
RDF = self.filter_applier.delete_relationship_by_list_filter(RDF)
|
||||||
|
# regex on ObjectURI
|
||||||
|
RDF = self.filter_applier.regex_on_objects(RDF)
|
||||||
if RDF.empty:
|
if RDF.empty:
|
||||||
continue
|
continue
|
||||||
RDF = self.filter_applier.rdf_add_special_token(RDF) # WARNING, THIS MUST BE DONE AFTER FILTER BY FREQUENCE
|
RDF = self.filter_applier.rdf_add_special_token(RDF) # WARNING, THIS MUST BE DONE AFTER FILTER BY FREQUENCE
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user