diff --git a/Projec-Model/UML/bpe.excalidraw.json b/Projec-Model/UML/bpe.excalidraw.json deleted file mode 100644 index 1400c25..0000000 --- a/Projec-Model/UML/bpe.excalidraw.json +++ /dev/null @@ -1,362 +0,0 @@ -{ - "type": "excalidraw", - "version": 2, - "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", - "elements": [ - { - "id": "EcT-dGsjmfW571ov8Gg4F", - "type": "text", - "x": 425.5, - "y": 130, - "width": 506, - "height": 550, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [ - "4rCC2-N1thmII8_dwNhe1" - ], - "frameId": null, - "index": "a3V", - "roundness": null, - "seed": 523521109, - "version": 758, - "versionNonce": 383976373, - "isDeleted": false, - "boundElements": [ - { - "id": "OA_NKjb3n3NLtUo_tKmPS", - "type": "arrow" - } - ], - "updated": 1758823931674, - "link": null, - "locked": false, - "text": "class NanoSocratesBPE:\n - vocabulary: Vocabulary\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int\n\n - reserve_capacity: float\n - token_length: int\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n \n", - "fontSize": 20, - "fontFamily": 8, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "class NanoSocratesBPE:\n - vocabulary: Vocabulary\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int\n\n - reserve_capacity: float\n - token_length: int\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n \n", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "74i4oK-JpcM4CgAqhz_x_", - "type": "rectangle", - "x": 382.5, - "y": 104, - "width": 592.5, - "height": 555, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [ - "4rCC2-N1thmII8_dwNhe1" - ], - "frameId": null, - "index": "a4", - "roundness": { - "type": 3 - }, - "seed": 50827893, - "version": 212, - "versionNonce": 692313525, - "isDeleted": false, - "boundElements": null, - "updated": 1758822941942, - "link": null, - "locked": false - }, - { - "id": "s8I1JoKulE3Vnti9a374p", - "type": "text", - "x": 1113, - "y": 128, - "width": 440, - "height": 250, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [ - "M6w9efVFwOZHkJGgwkyEw" - ], - "frameId": null, - "index": "a5", - "roundness": null, - "seed": 2091174261, - "version": 442, - "versionNonce": 1108352309, - "isDeleted": false, - "boundElements": null, - "updated": 1758822765308, - "link": null, - "locked": false, - "text": "class Vocabulary:\n\n - vocabulary: dict\n - reverse_vocabulary: dict\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n\n", - "fontSize": 20, - "fontFamily": 8, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "class Vocabulary:\n\n - vocabulary: dict\n - reverse_vocabulary: dict\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n\n", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "BY_Why7XDNftdMzPcwjVZ", - "type": "rectangle", - "x": 1086.5, - "y": 104, - "width": 504.5, - "height": 260.5, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [ - "M6w9efVFwOZHkJGgwkyEw" - ], - "frameId": null, - "index": "a6", - "roundness": { - "type": 3 - }, - "seed": 153939611, - "version": 153, - "versionNonce": 1903356469, - "isDeleted": false, - "boundElements": [ - { - "id": "WcDks9DR8UqeZEaxAcRf9", - "type": "arrow" - } - ], - "updated": 1758822805382, - "link": null, - "locked": false - }, - { - "id": "JCPDhuTKRx4MN950Q3jL-", - "type": "text", - "x": 1116.411067193676, - "y": 535.1519268774704, - "width": 427.72826086956525, - "height": 99.70355731225297, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [ - "DbtlKVF_9SjH2-9iMq9zy" - ], - "frameId": null, - "index": "a7", - "roundness": null, - "seed": 1326854235, - "version": 345, - "versionNonce": 592556603, - "isDeleted": false, - "boundElements": null, - "updated": 1758822845014, - "link": null, - "locked": false, - "text": "class NanoSocrateBPE_BatchMemory:\n\n + max_word_length: int\n + frequency: dict<(int, int), int> ", - "fontSize": 19.940711462450594, - "fontFamily": 8, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "class NanoSocrateBPE_BatchMemory:\n\n + max_word_length: int\n + frequency: dict<(int, int), int> ", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "l-O0rMS3SruV22_MPX9Jz", - "type": "rectangle", - "x": 1086.5, - "y": 509.22900197628456, - "width": 504.49999999999994, - "height": 154.04199604743084, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [ - "DbtlKVF_9SjH2-9iMq9zy" - ], - "frameId": null, - "index": "a8", - "roundness": { - "type": 3 - }, - "seed": 1490898171, - "version": 186, - "versionNonce": 1953870555, - "isDeleted": false, - "boundElements": [ - { - "id": "OA_NKjb3n3NLtUo_tKmPS", - "type": "arrow" - } - ], - "updated": 1758822845014, - "link": null, - "locked": false - }, - { - "id": "WcDks9DR8UqeZEaxAcRf9", - "type": "arrow", - "x": 773.5, - "y": 167, - "width": 298.5, - "height": 30, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "aB", - "roundness": { - "type": 2 - }, - "seed": 1681364149, - "version": 205, - "versionNonce": 1154753851, - "isDeleted": false, - "boundElements": [], - "updated": 1758823291274, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 144.5, - -1.5 - ], - [ - 177.5, - -30 - ], - [ - 298.5, - -29.5 - ] - ], - "lastCommittedPoint": null, - "startBinding": null, - "endBinding": { - "elementId": "BY_Why7XDNftdMzPcwjVZ", - "focus": 0.7285094931977862, - "gap": 14.5 - }, - "startArrowhead": null, - "endArrowhead": "triangle", - "elbowed": false - }, - { - "id": "OA_NKjb3n3NLtUo_tKmPS", - "type": "arrow", - "x": 941, - "y": 440.7646462573778, - "width": 132.9833600541258, - "height": 105.33206183359624, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "aD", - "roundness": { - "type": 2 - }, - "seed": 1871768059, - "version": 402, - "versionNonce": 462603541, - "isDeleted": false, - "boundElements": [], - "updated": 1758823931675, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 53, - 8.23535374262218 - ], - [ - 63, - 97.73535374262218 - ], - [ - 132.9833600541258, - 105.33206183359624 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "EcT-dGsjmfW571ov8Gg4F", - "focus": -0.01598303536344995, - "gap": 9.500000000000114 - }, - "endBinding": { - "elementId": "l-O0rMS3SruV22_MPX9Jz", - "focus": 0.10931526948750278, - "gap": 13.22003639101672 - }, - "startArrowhead": null, - "endArrowhead": "triangle", - "elbowed": false - } - ], - "appState": { - "gridSize": 20, - "gridStep": 5, - "gridModeEnabled": false, - "viewBackgroundColor": "#ffffff" - }, - "files": {} -} \ No newline at end of file diff --git a/Project_Model/UML/bpe.excalidraw.json b/Project_Model/UML/bpe.excalidraw.json new file mode 100644 index 0000000..d706222 --- /dev/null +++ b/Project_Model/UML/bpe.excalidraw.json @@ -0,0 +1,658 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", + "elements": [ + { + "id": "EcT-dGsjmfW571ov8Gg4F", + "type": "text", + "x": 425.5, + "y": 132, + "width": 506, + "height": 425, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "4rCC2-N1thmII8_dwNhe1" + ], + "frameId": null, + "index": "a3V", + "roundness": null, + "seed": 523521109, + "version": 883, + "versionNonce": 1590682729, + "isDeleted": false, + "boundElements": [ + { + "id": "OA_NKjb3n3NLtUo_tKmPS", + "type": "arrow" + } + ], + "updated": 1758881654155, + "link": null, + "locked": false, + "text": "class NanoSocratesBPE(Encoder):\n - vocabulary: Vocabulary\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory,\n last_sentence_chunk: bool, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n + get_vocabulary_size() -> int\n \n", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class NanoSocratesBPE(Encoder):\n - vocabulary: Vocabulary\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory,\n last_sentence_chunk: bool, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n + get_vocabulary_size() -> int\n \n", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "74i4oK-JpcM4CgAqhz_x_", + "type": "rectangle", + "x": 382.5, + "y": 104.5, + "width": 592.5, + "height": 421, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "4rCC2-N1thmII8_dwNhe1" + ], + "frameId": null, + "index": "a4", + "roundness": { + "type": 3 + }, + "seed": 50827893, + "version": 319, + "versionNonce": 704459557, + "isDeleted": false, + "boundElements": [], + "updated": 1758878226277, + "link": null, + "locked": false + }, + { + "id": "s8I1JoKulE3Vnti9a374p", + "type": "text", + "x": 1113.5, + "y": 127, + "width": 517, + "height": 325, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "M6w9efVFwOZHkJGgwkyEw" + ], + "frameId": null, + "index": "a5", + "roundness": null, + "seed": 2091174261, + "version": 480, + "versionNonce": 1964948039, + "isDeleted": false, + "boundElements": [], + "updated": 1758881941367, + "link": null, + "locked": false, + "text": "class Vocabulary:\n\n - vocabulary: dict<(int, int), int>\n - reverse_vocabulary: dict\n\n + size -> int\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n \n\n", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class Vocabulary:\n\n - vocabulary: dict<(int, int), int>\n - reverse_vocabulary: dict\n\n + size -> int\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n \n\n", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "BY_Why7XDNftdMzPcwjVZ", + "type": "rectangle", + "x": 1086.5, + "y": 105.5, + "width": 593.0000000000001, + "height": 325.5, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "M6w9efVFwOZHkJGgwkyEw" + ], + "frameId": null, + "index": "a6", + "roundness": { + "type": 3 + }, + "seed": 153939611, + "version": 234, + "versionNonce": 2068149129, + "isDeleted": false, + "boundElements": [ + { + "id": "WcDks9DR8UqeZEaxAcRf9", + "type": "arrow" + } + ], + "updated": 1758881945661, + "link": null, + "locked": false + }, + { + "id": "JCPDhuTKRx4MN950Q3jL-", + "type": "text", + "x": 1116.411067193676, + "y": 477.3809288774704, + "width": 416.74578857421875, + "height": 99.70355731225297, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "DbtlKVF_9SjH2-9iMq9zy" + ], + "frameId": null, + "index": "a7", + "roundness": null, + "seed": 1326854235, + "version": 479, + "versionNonce": 595084597, + "isDeleted": false, + "boundElements": [], + "updated": 1758902358518, + "link": null, + "locked": false, + "text": "class NanoSocrateBPE_BatchMemory:\n\n + frequency: dict<(int, int), int>\n + merge_treshold: int", + "fontSize": 19.940711462450594, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class NanoSocrateBPE_BatchMemory:\n\n + frequency: dict<(int, int), int>\n + merge_treshold: int", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "l-O0rMS3SruV22_MPX9Jz", + "type": "rectangle", + "x": 1086.5, + "y": 451.4580039762846, + "width": 593, + "height": 208.0419960474308, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [ + "DbtlKVF_9SjH2-9iMq9zy" + ], + "frameId": null, + "index": "a8", + "roundness": { + "type": 3 + }, + "seed": 1490898171, + "version": 305, + "versionNonce": 587306139, + "isDeleted": false, + "boundElements": [ + { + "id": "OA_NKjb3n3NLtUo_tKmPS", + "type": "arrow" + } + ], + "updated": 1758902358518, + "link": null, + "locked": false + }, + { + "id": "WcDks9DR8UqeZEaxAcRf9", + "type": "arrow", + "x": 773.5, + "y": 167, + "width": 297.17936724485867, + "height": 30, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aB", + "roundness": { + "type": 2 + }, + "seed": 1681364149, + "version": 303, + "versionNonce": 1262492265, + "isDeleted": false, + "boundElements": [], + "updated": 1758881945661, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 144.5, + -1.5 + ], + [ + 177.5, + -30 + ], + [ + 297.17936724485867, + -29.020420978562214 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": { + "elementId": "BY_Why7XDNftdMzPcwjVZ", + "focus": 0.77319587628866, + "gap": 18.25 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": false + }, + { + "id": "OA_NKjb3n3NLtUo_tKmPS", + "type": "arrow", + "x": 946.0000000000002, + "y": 274.95951048200493, + "width": 130.016707976343, + "height": 209.36808480159067, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 2 + }, + "seed": 1871768059, + "version": 1039, + "versionNonce": 213535035, + "isDeleted": false, + "boundElements": [], + "updated": 1758902358519, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 54.99999999999977, + 12.54048951799507 + ], + [ + 69.49999999999977, + 188.54048951799507 + ], + [ + 130.016707976343, + 209.36808480159067 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "EcT-dGsjmfW571ov8Gg4F", + "focus": -0.48312180762055096, + "gap": 14.500000000000114 + }, + "endBinding": { + "elementId": "l-O0rMS3SruV22_MPX9Jz", + "focus": -0.16742658425737647, + "gap": 11.194126334166185 + }, + "startArrowhead": null, + "endArrowhead": "triangle", + "elbowed": false + }, + { + "id": "snZ__VDsIlri6NTp8M2Gf", + "type": "text", + "x": -245.25, + "y": 103, + "width": 330, + "height": 125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 1758461093, + "version": 265, + "versionNonce": 1069481861, + "isDeleted": false, + "boundElements": [], + "updated": 1758879566916, + "link": null, + "locked": false, + "text": "class NanoSocratesBPETrainer:\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class NanoSocratesBPETrainer:\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "PnbmqwEWYkP8oXElKFyTp", + "type": "text", + "x": -237.75, + "y": 544, + "width": 561, + "height": 125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": null, + "seed": 501304683, + "version": 241, + "versionNonce": 1306401003, + "isDeleted": false, + "boundElements": [], + "updated": 1758878748210, + "link": null, + "locked": false, + "text": "class NanoSocratesSplitter:\n + splitter_regex: regex\n\n + split_text(corpus: str) -> [(str, TokenType)]\n", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class NanoSocratesSplitter:\n + splitter_regex: regex\n\n + split_text(corpus: str) -> [(str, TokenType)]\n", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xR_11IzgXX5O-m6WoRfCL", + "type": "text", + "x": -233.25, + "y": 366.5, + "width": 165, + "height": 75, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 2025585125, + "version": 395, + "versionNonce": 1799178985, + "isDeleted": false, + "boundElements": [], + "updated": 1758883940168, + "link": null, + "locked": false, + "text": "enum TokenType:\n + SPECIAL\n + BPE", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "enum TokenType:\n + SPECIAL\n + BPE", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "lgKSd9qCb94-5e8rd9I3r", + "type": "text", + "x": -219.75, + "y": 764.5, + "width": 462, + "height": 275, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJ", + "roundness": null, + "seed": 1963214021, + "version": 422, + "versionNonce": 903841927, + "isDeleted": false, + "boundElements": [], + "updated": 1758879973600, + "link": null, + "locked": false, + "text": "class TokeNano:\n\n - splitter: NanoSocratesSplitter\n - bpe_encoder: NanoSocratesBPE\n - special_encoder: NanoSocratesSpecial\n\n + encode(corpus: str) -> [int]\n\n - encode_special(piece: str) -> int\n\n - encode_bpe(piece: str) -> [int]", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class TokeNano:\n\n - splitter: NanoSocratesSplitter\n - bpe_encoder: NanoSocratesBPE\n - special_encoder: NanoSocratesSpecial\n\n + encode(corpus: str) -> [int]\n\n - encode_special(piece: str) -> int\n\n - encode_bpe(piece: str) -> [int]", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "DwFJoUpVT2YAEe9qPYAXa", + "type": "text", + "x": 496.75, + "y": 666, + "width": 440, + "height": 100, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aL", + "roundness": null, + "seed": 1317596203, + "version": 152, + "versionNonce": 1840679687, + "isDeleted": false, + "boundElements": [], + "updated": 1758880107704, + "link": null, + "locked": false, + "text": "class NanoSocratesSpecial(Encoder):\n\n + vocabulary: dict\n + reverse_vocabulary: dict", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class NanoSocratesSpecial(Encoder):\n\n + vocabulary: dict\n + reverse_vocabulary: dict", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "78gC46xatoO1_cRtaN8EC", + "type": "text", + "x": 396.375, + "y": -107.75, + "width": 346.3997802734375, + "height": 100, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aM", + "roundness": null, + "seed": 1187595241, + "version": 128, + "versionNonce": 1487192455, + "isDeleted": false, + "boundElements": [], + "updated": 1758879825591, + "link": null, + "locked": false, + "text": "class Encoder(ABC):\n\n + encode(corpus: str) -> [int]\n + decode(encoded: [int]) -> str ", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class Encoder(ABC):\n\n + encode(corpus: str) -> [int]\n + decode(encoded: [int]) -> str ", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "3j50Ds74uU7oXoJ9kMOYJ", + "type": "text", + "x": 457.375, + "y": 903.75, + "width": 949.7594604492188, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aN", + "roundness": null, + "seed": 1994335529, + "version": 198, + "versionNonce": 1492696519, + "isDeleted": false, + "boundElements": [], + "updated": 1758882694747, + "link": null, + "locked": false, + "text": "@@mamma@@è bell^^issima e @@^^le voglio molto b^^ene @--@ replit ^^ è molto ^^bello e^^ lo amo", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "@@mamma@@è bell^^issima e @@^^le voglio molto b^^ene @--@ replit ^^ è molto ^^bello e^^ lo amo", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "yg-TvQvz4MwJZ0y8K7Ix0", + "type": "text", + "x": 435.375, + "y": 1026.25, + "width": 352, + "height": 250, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aP", + "roundness": null, + "seed": 1877486407, + "version": 344, + "versionNonce": 25830153, + "isDeleted": false, + "boundElements": [], + "updated": 1758883468886, + "link": null, + "locked": false, + "text": "class NanoSocratesChunker:\n\n - max_bytes: int\n - max_special_length: int\n - special_token_regex: regex\n\n - residuals: str\n\n # This must be an iterator\n + read(path: Path) -> str", + "fontSize": 20, + "fontFamily": 8, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "class NanoSocratesChunker:\n\n - max_bytes: int\n - max_special_length: int\n - special_token_regex: regex\n\n - residuals: str\n\n # This must be an iterator\n + read(path: Path) -> str", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file