diff --git a/Scripts/UML/CleaningPipeline/cleaning-pipeline.excalidraw.json b/Scripts/UML/CleaningPipeline/cleaning-pipeline.excalidraw.json new file mode 100644 index 0000000..1249185 --- /dev/null +++ b/Scripts/UML/CleaningPipeline/cleaning-pipeline.excalidraw.json @@ -0,0 +1,634 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", + "elements": [ + { + "id": "JNB9z-PeqZ4s8KDfWaoXe", + "type": "rectangle", + "x": 106, + "y": 27, + "width": 653, + "height": 263, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a2", + "roundness": { + "type": 3 + }, + "seed": 710740889, + "version": 326, + "versionNonce": 1107631703, + "isDeleted": false, + "boundElements": null, + "updated": 1759156408059, + "link": null, + "locked": false + }, + { + "id": "e13wNTgUpn2flMpmMttqx", + "type": "text", + "x": 200.5943407656526, + "y": 44.07937975075269, + "width": 307.2781467269385, + "height": 23.3097531902191, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a3", + "roundness": null, + "seed": 1012740663, + "version": 444, + "versionNonce": 589551257, + "isDeleted": false, + "boundElements": null, + "updated": 1759156408059, + "link": null, + "locked": false, + "text": "Libs/CleaningPipeline/sql_endpoint", + "fontSize": 18.64780255217528, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Libs/CleaningPipeline/sql_endpoint", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "CgxCElJkKBtIHv-5WQrbo", + "type": "text", + "x": 195, + "y": 80.44259472749451, + "width": 403.64997665852184, + "height": 186.4780255217528, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a4", + "roundness": null, + "seed": 1261951799, + "version": 507, + "versionNonce": 1922906999, + "isDeleted": false, + "boundElements": null, + "updated": 1759156408059, + "link": null, + "locked": false, + "text": "Class SqlEndpoint:\n - sql_engine\n + movie_ids: list[int]\n\n #\n + get_abbreviated_dataset_by_movie_id\n\n", + "fontSize": 18.64780255217528, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Class SqlEndpoint:\n - sql_engine\n + movie_ids: list[int]\n\n #\n + get_abbreviated_dataset_by_movie_id\n\n", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "type": "line", + "version": 4978, + "versionNonce": 2079525497, + "isDeleted": false, + "id": "sYReMTdYblr-oJtYYJALU", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -68.05426555317842, + "y": 87.19293561900287, + "strokeColor": "#000000", + "backgroundColor": "#a5d8ff", + "width": 77.09201683999922, + "height": 99.49948667804088, + "seed": 1263944119, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "round", + "boundElementIds": [], + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0.2542098813493443, + 75.20117273657175 + ], + [ + 0.011896425679918422, + 83.76249969444815 + ], + [ + 3.970409367559332, + 87.46174320643391 + ], + [ + 17.75573317066317, + 90.59250103325854 + ], + [ + 41.05683533152865, + 91.56737225214069 + ], + [ + 63.319497586673116, + 90.01084754868091 + ], + [ + 75.14781395923075, + 86.28844687220405 + ], + [ + 76.81603792670788, + 83.15042405259751 + ], + [ + 77.05033394391478, + 76.25776215104557 + ], + [ + 76.86643881413028, + 6.3089586511537865 + ], + [ + 76.45188016352971, + -0.2999144698665015 + ], + [ + 71.50179495549581, + -3.9936571317850627 + ], + [ + 61.077971898861186, + -6.132877429442784 + ], + [ + 37.32348754161154, + -7.932114425900202 + ], + [ + 18.278415656797975, + -6.859225353587373 + ], + [ + 3.2995959613238286, + -3.2201165291205287 + ], + [ + -0.04168289608444441, + -0.045185660461322996 + ], + [ + 0, + 0 + ] + ], + "index": "a6", + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 2683, + "versionNonce": 33379161, + "isDeleted": false, + "id": "0S6dEWQVqKUVkP6Z5IX1l", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -67.53033611490343, + "y": 144.31921927673278, + "strokeColor": "#000000", + "backgroundColor": "#a5d8ff", + "width": 77.17198221193564, + "height": 8.562348957853036, + "seed": 817033943, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "round", + "boundElementIds": [], + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 2.033150371639873, + 3.413095389435587 + ], + [ + 10.801287372573954, + 6.276651055277943 + ], + [ + 22.468666942209353, + 8.010803051612635 + ], + [ + 40.747074201802775, + 8.168828515515864 + ], + [ + 62.077348233027564, + 7.0647721921469495 + ], + [ + 74.53446931782398, + 3.04824021069218 + ], + [ + 77.17198221193564, + -0.3935204423371723 + ] + ], + "index": "a7", + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 2769, + "versionNonce": 1703641145, + "isDeleted": false, + "id": "szGLND7J0nVOvRkNXX9AS", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -68.56219343740725, + "y": 115.35516394150972, + "strokeColor": "#000000", + "backgroundColor": "#a5d8ff", + "width": 77.17198221193564, + "height": 8.562348957853036, + "seed": 1704755191, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "round", + "boundElementIds": [], + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 2.033150371639873, + 3.413095389435587 + ], + [ + 10.801287372573954, + 6.276651055277943 + ], + [ + 22.468666942209353, + 8.010803051612635 + ], + [ + 40.747074201802775, + 8.168828515515864 + ], + [ + 62.077348233027564, + 7.0647721921469495 + ], + [ + 74.53446931782398, + 3.04824021069218 + ], + [ + 77.17198221193564, + -0.3935204423371723 + ] + ], + "index": "a8", + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "ellipse", + "version": 5766, + "versionNonce": 344002841, + "isDeleted": false, + "id": "O3t2uGktJlDd1_OX_bpV4", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -69.6201424194893, + "y": 80.06066699332126, + "strokeColor": "#000000", + "backgroundColor": "#a5d8ff", + "width": 76.59753601865496, + "height": 15.49127539284798, + "seed": 471296279, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "sharp", + "boundElementIds": [ + "bxuMGTzXLn7H-uBCptINx" + ], + "index": "a9", + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "ellipse", + "version": 1176, + "versionNonce": 1951499769, + "isDeleted": false, + "id": "_SzKlOBOvJgBg7FX0JTTM", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -33.12815531426679, + "y": 104.53733467322485, + "strokeColor": "#000000", + "backgroundColor": "#228be6", + "width": 11.226103154161754, + "height": 12.183758484455605, + "seed": 1368927799, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "sharp", + "boundElementIds": [], + "index": "aA", + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "ellipse", + "version": 1464, + "versionNonce": 1879072473, + "isDeleted": false, + "id": "oJMl2Kxa3SPaiAY0kxo7A", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -32.77701353033319, + "y": 130.75394896028996, + "strokeColor": "#000000", + "backgroundColor": "#228be6", + "width": 11.226103154161754, + "height": 12.183758484455605, + "seed": 1627606871, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "sharp", + "boundElementIds": [], + "index": "aB", + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "ellipse", + "version": 1347, + "versionNonce": 1176574905, + "isDeleted": false, + "id": "fB6pJBSMA-pRHrpgYKaLL", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 6.239590202363168, + "x": -32.12815531426679, + "y": 159.52267553159635, + "strokeColor": "#000000", + "backgroundColor": "#228be6", + "width": 11.226103154161754, + "height": 12.183758484455605, + "seed": 1420643447, + "groupIds": [ + "9YkNe1yqnfZy9Z1JX2xr4", + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "sharp", + "boundElementIds": [], + "index": "aC", + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 845, + "versionNonce": 383204505, + "isDeleted": false, + "id": "9gZ3Yy1MeP9kEOTLODqLG", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": -77.72012292771115, + "y": 181.11281713043917, + "strokeColor": "#000000", + "backgroundColor": "#a5d8ff", + "width": 95.63072204589844, + "height": 23.595161071904883, + "seed": 2019206551, + "groupIds": [ + "BDBCTrrhjbJynRAyuf3xJ" + ], + "strokeSharpness": "sharp", + "boundElementIds": [], + "fontSize": 17.4778970902999, + "fontFamily": 1, + "text": "dataset.db", + "baseline": 16.595161071904883, + "textAlign": "center", + "verticalAlign": "top", + "index": "aD", + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1759157176189, + "link": null, + "locked": false, + "containerId": null, + "originalText": "dataset.db", + "autoResize": true, + "lineHeight": 1.350000000000001 + }, + { + "id": "3eOw20xMhpB5jf_RMG24P", + "type": "text", + "x": 1131.3333333333335, + "y": 31.333333333333428, + "width": 508.3333333333333, + "height": 550, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 1535658041, + "version": 821, + "versionNonce": 1630266809, + "isDeleted": false, + "boundElements": null, + "updated": 1759157181677, + "link": null, + "locked": false, + "text": "Class PipelineApplier\n - movie_frequence_filter : pd.DataFrame()\n - rel_Frequence_Filter : pd.DataFrame()\n - rel_banned_list: list[str]\n\n + generate_movie_frequency_filter()\n + generate_rel_frequency_filter()\n + generate_list_relationship_filter()\n \n + filter_by_movie_frequency()\n + filter_by_relationship_frequency()\n + delete_relationship_by_list_filter()\n + delete_relationship_by_str()\n\n + drop_na() \n\n + rdf_add_special_token()\n + group_triple_by_movie()\n + build_by_movie()\n # static\n + build_triple()\n + build_incomplete_triple()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Class PipelineApplier\n - movie_frequence_filter : pd.DataFrame()\n - rel_Frequence_Filter : pd.DataFrame()\n - rel_banned_list: list[str]\n\n + generate_movie_frequency_filter()\n + generate_rel_frequency_filter()\n + generate_list_relationship_filter()\n \n + filter_by_movie_frequency()\n + filter_by_relationship_frequency()\n + delete_relationship_by_list_filter()\n + delete_relationship_by_str()\n\n + drop_na() \n\n + rdf_add_special_token()\n + group_triple_by_movie()\n + build_by_movie()\n # static\n + build_triple()\n + build_incomplete_triple()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "Fbl1gpb5r7QrdRauGUWm2", + "type": "text", + "x": 158.23809523809535, + "y": 502.52380952380935, + "width": 484.2857142857143, + "height": 475, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": null, + "seed": 2066618807, + "version": 541, + "versionNonce": 7392153, + "isDeleted": false, + "boundElements": null, + "updated": 1759157954202, + "link": null, + "locked": false, + "text": "Class Pipeline\n - sql_endpoint: SqlEndpoint()\n\n - task_rdf_mask_file_handler:\n - task_bpe_corpus_file_handler:\n - task_rdf_text_file_handler:\n - task_rdf_completation_file_handler:\n\n - Filter_applier : PipelineApplier()\n\n\n \n + execute_task_bpe_corpus()\n + execute_task_rdf_mask()\n + execute_task_rdf_text()\n + execute_task_rdf_completation()\n + execute_all_task()\n\n + use_toy_dataset()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Class Pipeline\n - sql_endpoint: SqlEndpoint()\n\n - task_rdf_mask_file_handler:\n - task_bpe_corpus_file_handler:\n - task_rdf_text_file_handler:\n - task_rdf_completation_file_handler:\n\n - Filter_applier : PipelineApplier()\n\n\n \n + execute_task_bpe_corpus()\n + execute_task_rdf_mask()\n + execute_task_rdf_text()\n + execute_task_rdf_completation()\n + execute_all_task()\n\n + use_toy_dataset()", + "autoResize": false, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file