{ "type": "excalidraw", "version": 2, "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor", "elements": [ { "id": "EcT-dGsjmfW571ov8Gg4F", "type": "text", "x": 425.5, "y": 132, "width": 506, "height": 425, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [ "4rCC2-N1thmII8_dwNhe1" ], "frameId": null, "index": "a3V", "roundness": null, "seed": 523521109, "version": 883, "versionNonce": 1590682729, "isDeleted": false, "boundElements": [ { "id": "OA_NKjb3n3NLtUo_tKmPS", "type": "arrow" } ], "updated": 1758881654155, "link": null, "locked": false, "text": "class NanoSocratesBPE(Encoder):\n - vocabulary: Vocabulary\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory,\n last_sentence_chunk: bool, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n + get_vocabulary_size() -> int\n \n", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class NanoSocratesBPE(Encoder):\n - vocabulary: Vocabulary\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory,\n last_sentence_chunk: bool, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n + get_vocabulary_size() -> int\n \n", "autoResize": true, "lineHeight": 1.25 }, { "id": "74i4oK-JpcM4CgAqhz_x_", "type": "rectangle", "x": 382.5, "y": 104.5, "width": 592.5, "height": 421, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [ "4rCC2-N1thmII8_dwNhe1" ], "frameId": null, "index": "a4", "roundness": { "type": 3 }, "seed": 50827893, "version": 319, "versionNonce": 704459557, "isDeleted": false, "boundElements": [], "updated": 1758878226277, "link": null, "locked": false }, { "id": "s8I1JoKulE3Vnti9a374p", "type": "text", "x": 1113.5, "y": 127, "width": 517, "height": 325, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [ "M6w9efVFwOZHkJGgwkyEw" ], "frameId": null, "index": "a5", "roundness": null, "seed": 2091174261, "version": 480, "versionNonce": 1964948039, "isDeleted": false, "boundElements": [], "updated": 1758881941367, "link": null, "locked": false, "text": "class Vocabulary:\n\n - vocabulary: dict<(int, int), int>\n - reverse_vocabulary: dict\n\n + size -> int\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n \n\n", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class Vocabulary:\n\n - vocabulary: dict<(int, int), int>\n - reverse_vocabulary: dict\n\n + size -> int\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n \n\n", "autoResize": true, "lineHeight": 1.25 }, { "id": "BY_Why7XDNftdMzPcwjVZ", "type": "rectangle", "x": 1086.5, "y": 105.5, "width": 593.0000000000001, "height": 325.5, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [ "M6w9efVFwOZHkJGgwkyEw" ], "frameId": null, "index": "a6", "roundness": { "type": 3 }, "seed": 153939611, "version": 234, "versionNonce": 2068149129, "isDeleted": false, "boundElements": [ { "id": "WcDks9DR8UqeZEaxAcRf9", "type": "arrow" } ], "updated": 1758881945661, "link": null, "locked": false }, { "id": "JCPDhuTKRx4MN950Q3jL-", "type": "text", "x": 1116.411067193676, "y": 477.3809288774704, "width": 416.74578857421875, "height": 99.70355731225297, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [ "DbtlKVF_9SjH2-9iMq9zy" ], "frameId": null, "index": "a7", "roundness": null, "seed": 1326854235, "version": 479, "versionNonce": 595084597, "isDeleted": false, "boundElements": [], "updated": 1758902358518, "link": null, "locked": false, "text": "class NanoSocrateBPE_BatchMemory:\n\n + frequency: dict<(int, int), int>\n + merge_treshold: int", "fontSize": 19.940711462450594, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class NanoSocrateBPE_BatchMemory:\n\n + frequency: dict<(int, int), int>\n + merge_treshold: int", "autoResize": true, "lineHeight": 1.25 }, { "id": "l-O0rMS3SruV22_MPX9Jz", "type": "rectangle", "x": 1086.5, "y": 451.4580039762846, "width": 593, "height": 208.0419960474308, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [ "DbtlKVF_9SjH2-9iMq9zy" ], "frameId": null, "index": "a8", "roundness": { "type": 3 }, "seed": 1490898171, "version": 305, "versionNonce": 587306139, "isDeleted": false, "boundElements": [ { "id": "OA_NKjb3n3NLtUo_tKmPS", "type": "arrow" } ], "updated": 1758902358518, "link": null, "locked": false }, { "id": "WcDks9DR8UqeZEaxAcRf9", "type": "arrow", "x": 773.5, "y": 167, "width": 297.17936724485867, "height": 30, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aB", "roundness": { "type": 2 }, "seed": 1681364149, "version": 303, "versionNonce": 1262492265, "isDeleted": false, "boundElements": [], "updated": 1758881945661, "link": null, "locked": false, "points": [ [ 0, 0 ], [ 144.5, -1.5 ], [ 177.5, -30 ], [ 297.17936724485867, -29.020420978562214 ] ], "lastCommittedPoint": null, "startBinding": null, "endBinding": { "elementId": "BY_Why7XDNftdMzPcwjVZ", "focus": 0.77319587628866, "gap": 18.25 }, "startArrowhead": null, "endArrowhead": "triangle", "elbowed": false }, { "id": "OA_NKjb3n3NLtUo_tKmPS", "type": "arrow", "x": 946.0000000000002, "y": 274.95951048200493, "width": 130.016707976343, "height": 209.36808480159067, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aD", "roundness": { "type": 2 }, "seed": 1871768059, "version": 1039, "versionNonce": 213535035, "isDeleted": false, "boundElements": [], "updated": 1758902358519, "link": null, "locked": false, "points": [ [ 0, 0 ], [ 54.99999999999977, 12.54048951799507 ], [ 69.49999999999977, 188.54048951799507 ], [ 130.016707976343, 209.36808480159067 ] ], "lastCommittedPoint": null, "startBinding": { "elementId": "EcT-dGsjmfW571ov8Gg4F", "focus": -0.48312180762055096, "gap": 14.500000000000114 }, "endBinding": { "elementId": "l-O0rMS3SruV22_MPX9Jz", "focus": -0.16742658425737647, "gap": 11.194126334166185 }, "startArrowhead": null, "endArrowhead": "triangle", "elbowed": false }, { "id": "snZ__VDsIlri6NTp8M2Gf", "type": "text", "x": -245.25, "y": 103, "width": 330, "height": 125, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aE", "roundness": null, "seed": 1758461093, "version": 265, "versionNonce": 1069481861, "isDeleted": false, "boundElements": [], "updated": 1758879566916, "link": null, "locked": false, "text": "class NanoSocratesBPETrainer:\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class NanoSocratesBPETrainer:\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int", "autoResize": true, "lineHeight": 1.25 }, { "id": "PnbmqwEWYkP8oXElKFyTp", "type": "text", "x": -237.75, "y": 544, "width": 561, "height": 125, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aH", "roundness": null, "seed": 501304683, "version": 241, "versionNonce": 1306401003, "isDeleted": false, "boundElements": [], "updated": 1758878748210, "link": null, "locked": false, "text": "class NanoSocratesSplitter:\n + splitter_regex: regex\n\n + split_text(corpus: str) -> [(str, TokenType)]\n", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class NanoSocratesSplitter:\n + splitter_regex: regex\n\n + split_text(corpus: str) -> [(str, TokenType)]\n", "autoResize": true, "lineHeight": 1.25 }, { "id": "xR_11IzgXX5O-m6WoRfCL", "type": "text", "x": -233.25, "y": 366.5, "width": 165, "height": 75, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aI", "roundness": null, "seed": 2025585125, "version": 395, "versionNonce": 1799178985, "isDeleted": false, "boundElements": [], "updated": 1758883940168, "link": null, "locked": false, "text": "enum TokenType:\n + SPECIAL\n + BPE", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "enum TokenType:\n + SPECIAL\n + BPE", "autoResize": true, "lineHeight": 1.25 }, { "id": "lgKSd9qCb94-5e8rd9I3r", "type": "text", "x": -219.75, "y": 764.5, "width": 462, "height": 275, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aJ", "roundness": null, "seed": 1963214021, "version": 464, "versionNonce": 1104453739, "isDeleted": false, "boundElements": [], "updated": 1759053302739, "link": null, "locked": false, "text": "class TokeNanoCore:\n\n - splitter: NanoSocratesSplitter\n - bpe_encoder: NanoSocratesBPE\n - special_encoder: NanoSocratesSpecial\n\n + encode(corpus: str) -> [int]\n\n - encode_special(piece: str) -> int\n\n - encode_bpe(piece: str) -> [int]", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class TokeNanoCore:\n\n - splitter: NanoSocratesSplitter\n - bpe_encoder: NanoSocratesBPE\n - special_encoder: NanoSocratesSpecial\n\n + encode(corpus: str) -> [int]\n\n - encode_special(piece: str) -> int\n\n - encode_bpe(piece: str) -> [int]", "autoResize": true, "lineHeight": 1.25 }, { "id": "DwFJoUpVT2YAEe9qPYAXa", "type": "text", "x": 496.75, "y": 666, "width": 440, "height": 100, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aL", "roundness": null, "seed": 1317596203, "version": 152, "versionNonce": 1840679687, "isDeleted": false, "boundElements": [], "updated": 1758880107704, "link": null, "locked": false, "text": "class NanoSocratesSpecial(Encoder):\n\n + vocabulary: dict\n + reverse_vocabulary: dict", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class NanoSocratesSpecial(Encoder):\n\n + vocabulary: dict\n + reverse_vocabulary: dict", "autoResize": true, "lineHeight": 1.25 }, { "id": "78gC46xatoO1_cRtaN8EC", "type": "text", "x": 396.375, "y": -107.75, "width": 396, "height": 100, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aM", "roundness": null, "seed": 1187595241, "version": 130, "versionNonce": 1273030504, "isDeleted": false, "boundElements": [], "updated": 1759070012771, "link": null, "locked": false, "text": "class Encoder(ABC):\n\n + encode(corpus: str) -> [int]\n + decode(encoded: [int]) -> str ", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class Encoder(ABC):\n\n + encode(corpus: str) -> [int]\n + decode(encoded: [int]) -> str ", "autoResize": true, "lineHeight": 1.25 }, { "id": "3j50Ds74uU7oXoJ9kMOYJ", "type": "text", "x": 457.375, "y": 903.75, "width": 949.7594604492188, "height": 25, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aN", "roundness": null, "seed": 1994335529, "version": 198, "versionNonce": 1492696519, "isDeleted": false, "boundElements": [], "updated": 1758882694747, "link": null, "locked": false, "text": "@@mamma@@è bell^^issima e @@^^le voglio molto b^^ene @--@ replit ^^ è molto ^^bello e^^ lo amo", "fontSize": 20, "fontFamily": 5, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "@@mamma@@è bell^^issima e @@^^le voglio molto b^^ene @--@ replit ^^ è molto ^^bello e^^ lo amo", "autoResize": true, "lineHeight": 1.25 }, { "id": "yg-TvQvz4MwJZ0y8K7Ix0", "type": "text", "x": 435.375, "y": 1026.25, "width": 352, "height": 250, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aP", "roundness": null, "seed": 1877486407, "version": 344, "versionNonce": 25830153, "isDeleted": false, "boundElements": [], "updated": 1758883468886, "link": null, "locked": false, "text": "class NanoSocratesChunker:\n\n - max_bytes: int\n - max_special_length: int\n - special_token_regex: regex\n\n - residuals: str\n\n # This must be an iterator\n + read(path: Path) -> str", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class NanoSocratesChunker:\n\n - max_bytes: int\n - max_special_length: int\n - special_token_regex: regex\n\n - residuals: str\n\n # This must be an iterator\n + read(path: Path) -> str", "autoResize": true, "lineHeight": 1.25 }, { "id": "2UXjWdE_jMcsCE2oQgTXn", "type": "text", "x": -334.75, "y": 1112.5, "width": 165, "height": 25, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "frameId": null, "index": "aQ", "roundness": null, "seed": 700532363, "version": 76, "versionNonce": 1671597672, "isDeleted": false, "boundElements": [], "updated": 1759070020002, "link": null, "locked": false, "text": "class TokeNano:", "fontSize": 20, "fontFamily": 8, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "class TokeNano:", "autoResize": true, "lineHeight": 1.25 } ], "appState": { "gridSize": 20, "gridStep": 5, "gridModeEnabled": false, "viewBackgroundColor": "#ffffff" }, "files": {} }