From 0a698e9837367de4e42d5b7506ed2a84b4e8f440 Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Thu, 25 Sep 2025 19:09:52 +0200 Subject: [PATCH] Added schema to extract from DB for BPE --- .../bpe-pipeline.excalidraw.json | 897 ++++++++++++++++++ 1 file changed, 897 insertions(+) create mode 100644 Scripts/UML/CleaningPipeline/bpe-pipeline.excalidraw.json diff --git a/Scripts/UML/CleaningPipeline/bpe-pipeline.excalidraw.json b/Scripts/UML/CleaningPipeline/bpe-pipeline.excalidraw.json new file mode 100644 index 0000000..0edf3cf --- /dev/null +++ b/Scripts/UML/CleaningPipeline/bpe-pipeline.excalidraw.json @@ -0,0 +1,897 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", + "elements": [ + { + "id": "3zbCui3XtIGozHXTVAGRp", + "type": "rectangle", + "x": 316.5, + "y": 123, + "width": 436.5, + "height": 145.5, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a0", + "roundness": { + "type": 3 + }, + "seed": 1698427950, + "version": 35, + "versionNonce": 601575602, + "isDeleted": false, + "boundElements": [ + { + "id": "wD66RDbG05HfvRhAtMb0J", + "type": "text" + }, + { + "id": "gus_rxauKJ6T2L_F59PfN", + "type": "arrow" + } + ], + "updated": 1758818588814, + "link": null, + "locked": false + }, + { + "id": "wD66RDbG05HfvRhAtMb0J", + "type": "text", + "x": 480.98004150390625, + "y": 183.25, + "width": 107.5399169921875, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a1", + "roundness": null, + "seed": 910769774, + "version": 31, + "versionNonce": 1120989938, + "isDeleted": false, + "boundElements": null, + "updated": 1758818416720, + "link": null, + "locked": false, + "text": "dataset.db", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "3zbCui3XtIGozHXTVAGRp", + "originalText": "dataset.db", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "87-MeaiZGT1wln0nggYPZ", + "type": "rectangle", + "x": 339.5, + "y": 309.5, + "width": 392, + "height": 156, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a2", + "roundness": { + "type": 3 + }, + "seed": 655550318, + "version": 77, + "versionNonce": 1103939826, + "isDeleted": false, + "boundElements": null, + "updated": 1758818339000, + "link": null, + "locked": false + }, + { + "id": "EjUxEhZqEBzwvlw0VE9eJ", + "type": "rectangle", + "x": 355.5, + "y": 327, + "width": 162, + "height": 125.5, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a3", + "roundness": { + "type": 3 + }, + "seed": 1739846638, + "version": 64, + "versionNonce": 1594290034, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "ogRkV0neHrhEKTE6zlggl" + } + ], + "updated": 1758818391415, + "link": null, + "locked": false + }, + { + "id": "ogRkV0neHrhEKTE6zlggl", + "type": "text", + "x": 378.7100524902344, + "y": 377.25, + "width": 115.57989501953125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a3V", + "roundness": null, + "seed": 2037675630, + "version": 12, + "versionNonce": 1286472046, + "isDeleted": false, + "boundElements": null, + "updated": 1758818399222, + "link": null, + "locked": false, + "text": "RDF_String", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "EjUxEhZqEBzwvlw0VE9eJ", + "originalText": "RDF_String", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "hoIRMNiMJZl4YDo-hovWy", + "type": "rectangle", + "x": 542.5, + "y": 327, + "width": 173, + "height": 125.5, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a4", + "roundness": { + "type": 3 + }, + "seed": 1189796530, + "version": 99, + "versionNonce": 1071057006, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "rsapATFAT5YSBCXzLupgZ" + }, + { + "id": "gus_rxauKJ6T2L_F59PfN", + "type": "arrow" + }, + { + "id": "Wk1bJbbtC31FqObEL5xWt", + "type": "arrow" + } + ], + "updated": 1758818593647, + "link": null, + "locked": false + }, + { + "id": "rsapATFAT5YSBCXzLupgZ", + "type": "text", + "x": 585.6800384521484, + "y": 377.25, + "width": 86.63992309570312, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a5", + "roundness": null, + "seed": 829619694, + "version": 12, + "versionNonce": 713902318, + "isDeleted": false, + "boundElements": null, + "updated": 1758818405150, + "link": null, + "locked": false, + "text": "Abstract", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "hoIRMNiMJZl4YDo-hovWy", + "originalText": "Abstract", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "jSx8ApfhtRs_nk37VvDMb", + "type": "rectangle", + "x": 316.5, + "y": 511, + "width": 436.5, + "height": 145.5, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a6", + "roundness": { + "type": 3 + }, + "seed": 492582894, + "version": 132, + "versionNonce": 893797614, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "6E23g-rgowNqHsBxX-LuM" + }, + { + "id": "hyFKqXwet_F79QM71atgI", + "type": "arrow" + }, + { + "id": "x_DP1FcQ7jraGz0gBuDi3", + "type": "arrow" + }, + { + "id": "1IGbCps2EHnzKgJUWM5nq", + "type": "arrow" + }, + { + "id": "Wk1bJbbtC31FqObEL5xWt", + "type": "arrow" + } + ], + "updated": 1758818593647, + "link": null, + "locked": false + }, + { + "id": "6E23g-rgowNqHsBxX-LuM", + "type": "text", + "x": 499.9100341796875, + "y": 571.25, + "width": 69.679931640625, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a7", + "roundness": null, + "seed": 267696178, + "version": 132, + "versionNonce": 1668243186, + "isDeleted": false, + "boundElements": null, + "updated": 1758818543211, + "link": null, + "locked": false, + "text": "Pandas", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "jSx8ApfhtRs_nk37VvDMb", + "originalText": "Pandas", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ohj18N4AOTDz5lJNcV9gi", + "type": "rectangle", + "x": 261, + "y": 765.5, + "width": 157, + "height": 87, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a8", + "roundness": { + "type": 3 + }, + "seed": 1446207150, + "version": 279, + "versionNonce": 317375026, + "isDeleted": false, + "boundElements": [ + { + "id": "Ea1_ke2wA0D8ZjVOUtvfY", + "type": "text" + }, + { + "id": "hyFKqXwet_F79QM71atgI", + "type": "arrow" + } + ], + "updated": 1758818570993, + "link": null, + "locked": false + }, + { + "id": "Ea1_ke2wA0D8ZjVOUtvfY", + "type": "text", + "x": 297.0800323486328, + "y": 796.5, + "width": 84.83993530273438, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "a9", + "roundness": null, + "seed": 435116270, + "version": 199, + "versionNonce": 1282911218, + "isDeleted": false, + "boundElements": null, + "updated": 1758818570993, + "link": null, + "locked": false, + "text": "train.txt", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "ohj18N4AOTDz5lJNcV9gi", + "originalText": "train.txt", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "A4Y54Y26fe257U_QU9lxX", + "type": "rectangle", + "x": 464, + "y": 765.5, + "width": 157, + "height": 87, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aA", + "roundness": { + "type": 3 + }, + "seed": 186148850, + "version": 232, + "versionNonce": 997119858, + "isDeleted": false, + "boundElements": [ + { + "id": "v4TvUlDEjH7EvPDmtbOn2", + "type": "text" + }, + { + "id": "1IGbCps2EHnzKgJUWM5nq", + "type": "arrow" + } + ], + "updated": 1758818570993, + "link": null, + "locked": false + }, + { + "id": "v4TvUlDEjH7EvPDmtbOn2", + "type": "text", + "x": 476.3500442504883, + "y": 796.5, + "width": 132.29991149902344, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aB", + "roundness": null, + "seed": 1131059634, + "version": 171, + "versionNonce": 239540530, + "isDeleted": false, + "boundElements": null, + "updated": 1758818570993, + "link": null, + "locked": false, + "text": "validation.txt", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "A4Y54Y26fe257U_QU9lxX", + "originalText": "validation.txt", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "mPaYpJ9Xn7tlJPmKPqJKJ", + "type": "rectangle", + "x": 674.5, + "y": 765.5, + "width": 157, + "height": 87, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aC", + "roundness": { + "type": 3 + }, + "seed": 1049323314, + "version": 235, + "versionNonce": 330560690, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "kg9nm2rpud6cax5aNPSnu" + }, + { + "id": "x_DP1FcQ7jraGz0gBuDi3", + "type": "arrow" + } + ], + "updated": 1758818570993, + "link": null, + "locked": false + }, + { + "id": "kg9nm2rpud6cax5aNPSnu", + "type": "text", + "x": 711.4300231933594, + "y": 796.5, + "width": 83.13995361328125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": null, + "seed": 522572142, + "version": 193, + "versionNonce": 1920372338, + "isDeleted": false, + "boundElements": null, + "updated": 1758818570993, + "link": null, + "locked": false, + "text": "test.txt", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "mPaYpJ9Xn7tlJPmKPqJKJ", + "originalText": "test.txt", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "hyFKqXwet_F79QM71atgI", + "type": "arrow", + "x": 534.65, + "y": 661.5, + "width": 195.25, + "height": 99, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 873266098, + "version": 71, + "versionNonce": 541154738, + "isDeleted": false, + "boundElements": null, + "updated": 1758818570993, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49.5 + ], + [ + -195.25, + 49.5 + ], + [ + -195.25, + 99 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "jSx8ApfhtRs_nk37VvDMb", + "fixedPoint": [ + 0.49977090492554405, + 1.034364261168385 + ], + "focus": 0, + "gap": 0 + }, + "endBinding": { + "elementId": "ohj18N4AOTDz5lJNcV9gi", + "fixedPoint": [ + 0.4993630573248406, + -0.05747126436781609 + ], + "focus": 0, + "gap": 0 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "x_DP1FcQ7jraGz0gBuDi3", + "type": "arrow", + "x": 534.65, + "y": 661.5, + "width": 218.25, + "height": 99, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": null, + "seed": 1210817582, + "version": 77, + "versionNonce": 1483392370, + "isDeleted": false, + "boundElements": null, + "updated": 1758818580594, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49.5 + ], + [ + 218.25, + 49.5 + ], + [ + 218.25, + 99 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "jSx8ApfhtRs_nk37VvDMb", + "fixedPoint": [ + 0.49977090492554405, + 1.034364261168385 + ], + "focus": 0, + "gap": 0 + }, + "endBinding": { + "elementId": "mPaYpJ9Xn7tlJPmKPqJKJ", + "fixedPoint": [ + 0.4993630573248406, + -0.05747126436781609 + ], + "focus": 0, + "gap": 0 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "1IGbCps2EHnzKgJUWM5nq", + "type": "arrow", + "x": 534.65, + "y": 661.5, + "width": 0.5719232650604908, + "height": 99.07394122590165, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aK", + "roundness": null, + "seed": 1205316658, + "version": 96, + "versionNonce": 1748050674, + "isDeleted": false, + "boundElements": null, + "updated": 1758818570993, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -0.5719232650604908, + 99.07394122590165 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "jSx8ApfhtRs_nk37VvDMb", + "fixedPoint": [ + 0.49977090492554405, + 1.034364261168385 + ], + "focus": 0, + "gap": 0 + }, + "endBinding": { + "elementId": "A4Y54Y26fe257U_QU9lxX", + "fixedPoint": [ + 0.44635717665566554, + -0.056621365219521276 + ], + "focus": 0, + "gap": 0 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": true, + "fixedSegments": null, + "startIsSpecial": null, + "endIsSpecial": null + }, + { + "id": "gus_rxauKJ6T2L_F59PfN", + "type": "arrow", + "x": 539, + "y": 271.5, + "width": 0, + "height": 33.5, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aL", + "roundness": null, + "seed": 763990258, + "version": 17, + "versionNonce": 1028811378, + "isDeleted": false, + "boundElements": null, + "updated": 1758818588814, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 33.5 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "3zbCui3XtIGozHXTVAGRp", + "focus": -0.019473081328751418, + "gap": 3 + }, + "endBinding": { + "elementId": "hoIRMNiMJZl4YDo-hovWy", + "focus": -1.0404624277456647, + "gap": 30.7545797799829 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": false + }, + { + "id": "Wk1bJbbtC31FqObEL5xWt", + "type": "arrow", + "x": 536.5, + "y": 468.5, + "width": 0, + "height": 39, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aM", + "roundness": null, + "seed": 1489771054, + "version": 33, + "versionNonce": 1828178606, + "isDeleted": false, + "boundElements": null, + "updated": 1758818593647, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 39 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "hoIRMNiMJZl4YDo-hovWy", + "focus": 1.0693641618497107, + "gap": 27.157190169432425 + }, + "endBinding": { + "elementId": "jSx8ApfhtRs_nk37VvDMb", + "focus": 0.008018327605956525, + "gap": 3.5 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": false + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file