NanoSocrates/Scripts/UML/CleaningPipeline/cleaning-pipeline.excalidraw.json
2025-09-29 18:53:33 +02:00

634 lines
17 KiB
JSON

{
"type": "excalidraw",
"version": 2,
"source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor",
"elements": [
{
"id": "JNB9z-PeqZ4s8KDfWaoXe",
"type": "rectangle",
"x": 106,
"y": 27,
"width": 653,
"height": 263,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"index": "a2",
"roundness": {
"type": 3
},
"seed": 710740889,
"version": 326,
"versionNonce": 1107631703,
"isDeleted": false,
"boundElements": null,
"updated": 1759156408059,
"link": null,
"locked": false
},
{
"id": "e13wNTgUpn2flMpmMttqx",
"type": "text",
"x": 200.5943407656526,
"y": 44.07937975075269,
"width": 307.2781467269385,
"height": 23.3097531902191,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"index": "a3",
"roundness": null,
"seed": 1012740663,
"version": 444,
"versionNonce": 589551257,
"isDeleted": false,
"boundElements": null,
"updated": 1759156408059,
"link": null,
"locked": false,
"text": "Libs/CleaningPipeline/sql_endpoint",
"fontSize": 18.64780255217528,
"fontFamily": 5,
"textAlign": "left",
"verticalAlign": "top",
"containerId": null,
"originalText": "Libs/CleaningPipeline/sql_endpoint",
"autoResize": true,
"lineHeight": 1.25
},
{
"id": "CgxCElJkKBtIHv-5WQrbo",
"type": "text",
"x": 195,
"y": 80.44259472749451,
"width": 403.64997665852184,
"height": 186.4780255217528,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"index": "a4",
"roundness": null,
"seed": 1261951799,
"version": 507,
"versionNonce": 1922906999,
"isDeleted": false,
"boundElements": null,
"updated": 1759156408059,
"link": null,
"locked": false,
"text": "Class SqlEndpoint:\n - sql_engine\n + movie_ids: list[int]\n\n #\n + get_abbreviated_dataset_by_movie_id\n\n",
"fontSize": 18.64780255217528,
"fontFamily": 5,
"textAlign": "left",
"verticalAlign": "top",
"containerId": null,
"originalText": "Class SqlEndpoint:\n - sql_engine\n + movie_ids: list[int]\n\n #\n + get_abbreviated_dataset_by_movie_id\n\n",
"autoResize": true,
"lineHeight": 1.25
},
{
"type": "line",
"version": 4979,
"versionNonce": 1473849177,
"isDeleted": false,
"id": "sYReMTdYblr-oJtYYJALU",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -67.14432426259049,
"y": 87.19293561900287,
"strokeColor": "#000000",
"backgroundColor": "#a5d8ff",
"width": 77.09201683999922,
"height": 99.49948667804088,
"seed": 1263944119,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "round",
"boundElementIds": [],
"startBinding": null,
"endBinding": null,
"lastCommittedPoint": null,
"startArrowhead": null,
"endArrowhead": null,
"points": [
[
0,
0
],
[
0.2542098813493443,
75.20117273657175
],
[
0.011896425679918422,
83.76249969444815
],
[
3.970409367559332,
87.46174320643391
],
[
17.75573317066317,
90.59250103325854
],
[
41.05683533152865,
91.56737225214069
],
[
63.319497586673116,
90.01084754868091
],
[
75.14781395923075,
86.28844687220405
],
[
76.81603792670788,
83.15042405259751
],
[
77.05033394391478,
76.25776215104557
],
[
76.86643881413028,
6.3089586511537865
],
[
76.45188016352971,
-0.2999144698665015
],
[
71.50179495549581,
-3.9936571317850627
],
[
61.077971898861186,
-6.132877429442784
],
[
37.32348754161154,
-7.932114425900202
],
[
18.278415656797975,
-6.859225353587373
],
[
3.2995959613238286,
-3.2201165291205287
],
[
-0.04168289608444441,
-0.045185660461322996
],
[
0,
0
]
],
"index": "a6",
"frameId": null,
"roundness": {
"type": 2
},
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "line",
"version": 2684,
"versionNonce": 952947769,
"isDeleted": false,
"id": "0S6dEWQVqKUVkP6Z5IX1l",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -66.6203948243155,
"y": 144.31921927673278,
"strokeColor": "#000000",
"backgroundColor": "#a5d8ff",
"width": 77.17198221193564,
"height": 8.562348957853036,
"seed": 817033943,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "round",
"boundElementIds": [],
"startBinding": null,
"endBinding": null,
"lastCommittedPoint": null,
"startArrowhead": null,
"endArrowhead": null,
"points": [
[
0,
0
],
[
2.033150371639873,
3.413095389435587
],
[
10.801287372573954,
6.276651055277943
],
[
22.468666942209353,
8.010803051612635
],
[
40.747074201802775,
8.168828515515864
],
[
62.077348233027564,
7.0647721921469495
],
[
74.53446931782398,
3.04824021069218
],
[
77.17198221193564,
-0.3935204423371723
]
],
"index": "a7",
"frameId": null,
"roundness": {
"type": 2
},
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "line",
"version": 2770,
"versionNonce": 477619481,
"isDeleted": false,
"id": "szGLND7J0nVOvRkNXX9AS",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -67.65225214681931,
"y": 115.35516394150972,
"strokeColor": "#000000",
"backgroundColor": "#a5d8ff",
"width": 77.17198221193564,
"height": 8.562348957853036,
"seed": 1704755191,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "round",
"boundElementIds": [],
"startBinding": null,
"endBinding": null,
"lastCommittedPoint": null,
"startArrowhead": null,
"endArrowhead": null,
"points": [
[
0,
0
],
[
2.033150371639873,
3.413095389435587
],
[
10.801287372573954,
6.276651055277943
],
[
22.468666942209353,
8.010803051612635
],
[
40.747074201802775,
8.168828515515864
],
[
62.077348233027564,
7.0647721921469495
],
[
74.53446931782398,
3.04824021069218
],
[
77.17198221193564,
-0.3935204423371723
]
],
"index": "a8",
"frameId": null,
"roundness": {
"type": 2
},
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "ellipse",
"version": 5767,
"versionNonce": 2119031289,
"isDeleted": false,
"id": "O3t2uGktJlDd1_OX_bpV4",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -68.71020112890136,
"y": 80.06066699332126,
"strokeColor": "#000000",
"backgroundColor": "#a5d8ff",
"width": 76.59753601865496,
"height": 15.49127539284798,
"seed": 471296279,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "sharp",
"boundElementIds": [
"bxuMGTzXLn7H-uBCptINx"
],
"index": "a9",
"frameId": null,
"roundness": null,
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "ellipse",
"version": 1177,
"versionNonce": 525480665,
"isDeleted": false,
"id": "_SzKlOBOvJgBg7FX0JTTM",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -32.218214023678854,
"y": 104.53733467322485,
"strokeColor": "#000000",
"backgroundColor": "#228be6",
"width": 11.226103154161754,
"height": 12.183758484455605,
"seed": 1368927799,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "sharp",
"boundElementIds": [],
"index": "aA",
"frameId": null,
"roundness": null,
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "ellipse",
"version": 1465,
"versionNonce": 1410887609,
"isDeleted": false,
"id": "oJMl2Kxa3SPaiAY0kxo7A",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -31.867072239745255,
"y": 130.75394896028996,
"strokeColor": "#000000",
"backgroundColor": "#228be6",
"width": 11.226103154161754,
"height": 12.183758484455605,
"seed": 1627606871,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "sharp",
"boundElementIds": [],
"index": "aB",
"frameId": null,
"roundness": null,
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "ellipse",
"version": 1348,
"versionNonce": 314839193,
"isDeleted": false,
"id": "fB6pJBSMA-pRHrpgYKaLL",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 6.239590202363168,
"x": -31.218214023678854,
"y": 159.52267553159635,
"strokeColor": "#000000",
"backgroundColor": "#228be6",
"width": 11.226103154161754,
"height": 12.183758484455605,
"seed": 1420643447,
"groupIds": [
"9YkNe1yqnfZy9Z1JX2xr4",
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "sharp",
"boundElementIds": [],
"index": "aC",
"frameId": null,
"roundness": null,
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false
},
{
"type": "text",
"version": 846,
"versionNonce": 1091081593,
"isDeleted": false,
"id": "9gZ3Yy1MeP9kEOTLODqLG",
"fillStyle": "solid",
"strokeWidth": 1,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"angle": 0,
"x": -76.81018163712321,
"y": 181.11281713043917,
"strokeColor": "#000000",
"backgroundColor": "#a5d8ff",
"width": 95.63072204589844,
"height": 23.595161071904883,
"seed": 2019206551,
"groupIds": [
"BDBCTrrhjbJynRAyuf3xJ"
],
"strokeSharpness": "sharp",
"boundElementIds": [],
"fontSize": 17.4778970902999,
"fontFamily": 1,
"text": "dataset.db",
"baseline": 16.595161071904883,
"textAlign": "center",
"verticalAlign": "top",
"index": "aD",
"frameId": null,
"roundness": null,
"boundElements": [],
"updated": 1759158252997,
"link": null,
"locked": false,
"containerId": null,
"originalText": "dataset.db",
"autoResize": true,
"lineHeight": 1.350000000000001
},
{
"id": "3eOw20xMhpB5jf_RMG24P",
"type": "text",
"x": 1131.3333333333335,
"y": 31.333333333333428,
"width": 508.3333333333333,
"height": 550,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"index": "aE",
"roundness": null,
"seed": 1535658041,
"version": 821,
"versionNonce": 1630266809,
"isDeleted": false,
"boundElements": null,
"updated": 1759157181677,
"link": null,
"locked": false,
"text": "Class PipelineApplier\n - movie_frequence_filter : pd.DataFrame()\n - rel_Frequence_Filter : pd.DataFrame()\n - rel_banned_list: list[str]\n\n + generate_movie_frequency_filter()\n + generate_rel_frequency_filter()\n + generate_list_relationship_filter()\n \n + filter_by_movie_frequency()\n + filter_by_relationship_frequency()\n + delete_relationship_by_list_filter()\n + delete_relationship_by_str()\n\n + drop_na() \n\n + rdf_add_special_token()\n + group_triple_by_movie()\n + build_by_movie()\n # static\n + build_triple()\n + build_incomplete_triple()",
"fontSize": 20,
"fontFamily": 5,
"textAlign": "left",
"verticalAlign": "top",
"containerId": null,
"originalText": "Class PipelineApplier\n - movie_frequence_filter : pd.DataFrame()\n - rel_Frequence_Filter : pd.DataFrame()\n - rel_banned_list: list[str]\n\n + generate_movie_frequency_filter()\n + generate_rel_frequency_filter()\n + generate_list_relationship_filter()\n \n + filter_by_movie_frequency()\n + filter_by_relationship_frequency()\n + delete_relationship_by_list_filter()\n + delete_relationship_by_str()\n\n + drop_na() \n\n + rdf_add_special_token()\n + group_triple_by_movie()\n + build_by_movie()\n # static\n + build_triple()\n + build_incomplete_triple()",
"autoResize": false,
"lineHeight": 1.25
},
{
"id": "Fbl1gpb5r7QrdRauGUWm2",
"type": "text",
"x": 158.23809523809535,
"y": 502.52380952380935,
"width": 484.2857142857143,
"height": 500,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"index": "aF",
"roundness": null,
"seed": 2066618807,
"version": 552,
"versionNonce": 1269344823,
"isDeleted": false,
"boundElements": null,
"updated": 1759158199532,
"link": null,
"locked": false,
"text": "Class Pipeline\n - sql_endpoint: SqlEndpoint()\n\n - task_rdf_mask_file_handler:\n - task_bpe_corpus_file_handler:\n - task_rdf_text_file_handler:\n - task_rdf_completation_file_handler:\n\n - Filter_applier : PipelineApplier()\n\n #\n - get_cleaned_movie_rows()\n \n + execute_task_bpe_corpus()\n + execute_task_rdf_mask()\n + execute_task_rdf_text()\n + execute_task_rdf_completation()\n + execute_all_task()\n\n + use_toy_dataset()",
"fontSize": 20,
"fontFamily": 5,
"textAlign": "left",
"verticalAlign": "top",
"containerId": null,
"originalText": "Class Pipeline\n - sql_endpoint: SqlEndpoint()\n\n - task_rdf_mask_file_handler:\n - task_bpe_corpus_file_handler:\n - task_rdf_text_file_handler:\n - task_rdf_completation_file_handler:\n\n - Filter_applier : PipelineApplier()\n\n #\n - get_cleaned_movie_rows()\n \n + execute_task_bpe_corpus()\n + execute_task_rdf_mask()\n + execute_task_rdf_text()\n + execute_task_rdf_completation()\n + execute_all_task()\n\n + use_toy_dataset()",
"autoResize": false,
"lineHeight": 1.25
}
],
"appState": {
"gridSize": 20,
"gridStep": 5,
"gridModeEnabled": false,
"viewBackgroundColor": "#ffffff"
},
"files": {}
}