Modified the architecture for BPE
This commit is contained in:
parent
5801a819e9
commit
be8a87ce01
@ -1,362 +0,0 @@
|
||||
{
|
||||
"type": "excalidraw",
|
||||
"version": 2,
|
||||
"source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor",
|
||||
"elements": [
|
||||
{
|
||||
"id": "EcT-dGsjmfW571ov8Gg4F",
|
||||
"type": "text",
|
||||
"x": 425.5,
|
||||
"y": 130,
|
||||
"width": 506,
|
||||
"height": 550,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"4rCC2-N1thmII8_dwNhe1"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a3V",
|
||||
"roundness": null,
|
||||
"seed": 523521109,
|
||||
"version": 758,
|
||||
"versionNonce": 383976373,
|
||||
"isDeleted": false,
|
||||
"boundElements": [
|
||||
{
|
||||
"id": "OA_NKjb3n3NLtUo_tKmPS",
|
||||
"type": "arrow"
|
||||
}
|
||||
],
|
||||
"updated": 1758823931674,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocratesBPE:\n - vocabulary: Vocabulary\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int\n\n - reserve_capacity: float\n - token_length: int\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n \n",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocratesBPE:\n - vocabulary: Vocabulary\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int\n\n - reserve_capacity: float\n - token_length: int\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n \n",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "74i4oK-JpcM4CgAqhz_x_",
|
||||
"type": "rectangle",
|
||||
"x": 382.5,
|
||||
"y": 104,
|
||||
"width": 592.5,
|
||||
"height": 555,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"4rCC2-N1thmII8_dwNhe1"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a4",
|
||||
"roundness": {
|
||||
"type": 3
|
||||
},
|
||||
"seed": 50827893,
|
||||
"version": 212,
|
||||
"versionNonce": 692313525,
|
||||
"isDeleted": false,
|
||||
"boundElements": null,
|
||||
"updated": 1758822941942,
|
||||
"link": null,
|
||||
"locked": false
|
||||
},
|
||||
{
|
||||
"id": "s8I1JoKulE3Vnti9a374p",
|
||||
"type": "text",
|
||||
"x": 1113,
|
||||
"y": 128,
|
||||
"width": 440,
|
||||
"height": 250,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"M6w9efVFwOZHkJGgwkyEw"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a5",
|
||||
"roundness": null,
|
||||
"seed": 2091174261,
|
||||
"version": 442,
|
||||
"versionNonce": 1108352309,
|
||||
"isDeleted": false,
|
||||
"boundElements": null,
|
||||
"updated": 1758822765308,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class Vocabulary:\n\n - vocabulary: dict<int, int>\n - reverse_vocabulary: dict<int, int>\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n\n",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class Vocabulary:\n\n - vocabulary: dict<int, int>\n - reverse_vocabulary: dict<int, int>\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n\n",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "BY_Why7XDNftdMzPcwjVZ",
|
||||
"type": "rectangle",
|
||||
"x": 1086.5,
|
||||
"y": 104,
|
||||
"width": 504.5,
|
||||
"height": 260.5,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"M6w9efVFwOZHkJGgwkyEw"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a6",
|
||||
"roundness": {
|
||||
"type": 3
|
||||
},
|
||||
"seed": 153939611,
|
||||
"version": 153,
|
||||
"versionNonce": 1903356469,
|
||||
"isDeleted": false,
|
||||
"boundElements": [
|
||||
{
|
||||
"id": "WcDks9DR8UqeZEaxAcRf9",
|
||||
"type": "arrow"
|
||||
}
|
||||
],
|
||||
"updated": 1758822805382,
|
||||
"link": null,
|
||||
"locked": false
|
||||
},
|
||||
{
|
||||
"id": "JCPDhuTKRx4MN950Q3jL-",
|
||||
"type": "text",
|
||||
"x": 1116.411067193676,
|
||||
"y": 535.1519268774704,
|
||||
"width": 427.72826086956525,
|
||||
"height": 99.70355731225297,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"DbtlKVF_9SjH2-9iMq9zy"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a7",
|
||||
"roundness": null,
|
||||
"seed": 1326854235,
|
||||
"version": 345,
|
||||
"versionNonce": 592556603,
|
||||
"isDeleted": false,
|
||||
"boundElements": null,
|
||||
"updated": 1758822845014,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocrateBPE_BatchMemory:\n\n + max_word_length: int\n + frequency: dict<(int, int), int> ",
|
||||
"fontSize": 19.940711462450594,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocrateBPE_BatchMemory:\n\n + max_word_length: int\n + frequency: dict<(int, int), int> ",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "l-O0rMS3SruV22_MPX9Jz",
|
||||
"type": "rectangle",
|
||||
"x": 1086.5,
|
||||
"y": 509.22900197628456,
|
||||
"width": 504.49999999999994,
|
||||
"height": 154.04199604743084,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"DbtlKVF_9SjH2-9iMq9zy"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a8",
|
||||
"roundness": {
|
||||
"type": 3
|
||||
},
|
||||
"seed": 1490898171,
|
||||
"version": 186,
|
||||
"versionNonce": 1953870555,
|
||||
"isDeleted": false,
|
||||
"boundElements": [
|
||||
{
|
||||
"id": "OA_NKjb3n3NLtUo_tKmPS",
|
||||
"type": "arrow"
|
||||
}
|
||||
],
|
||||
"updated": 1758822845014,
|
||||
"link": null,
|
||||
"locked": false
|
||||
},
|
||||
{
|
||||
"id": "WcDks9DR8UqeZEaxAcRf9",
|
||||
"type": "arrow",
|
||||
"x": 773.5,
|
||||
"y": 167,
|
||||
"width": 298.5,
|
||||
"height": 30,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aB",
|
||||
"roundness": {
|
||||
"type": 2
|
||||
},
|
||||
"seed": 1681364149,
|
||||
"version": 205,
|
||||
"versionNonce": 1154753851,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758823291274,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"points": [
|
||||
[
|
||||
0,
|
||||
0
|
||||
],
|
||||
[
|
||||
144.5,
|
||||
-1.5
|
||||
],
|
||||
[
|
||||
177.5,
|
||||
-30
|
||||
],
|
||||
[
|
||||
298.5,
|
||||
-29.5
|
||||
]
|
||||
],
|
||||
"lastCommittedPoint": null,
|
||||
"startBinding": null,
|
||||
"endBinding": {
|
||||
"elementId": "BY_Why7XDNftdMzPcwjVZ",
|
||||
"focus": 0.7285094931977862,
|
||||
"gap": 14.5
|
||||
},
|
||||
"startArrowhead": null,
|
||||
"endArrowhead": "triangle",
|
||||
"elbowed": false
|
||||
},
|
||||
{
|
||||
"id": "OA_NKjb3n3NLtUo_tKmPS",
|
||||
"type": "arrow",
|
||||
"x": 941,
|
||||
"y": 440.7646462573778,
|
||||
"width": 132.9833600541258,
|
||||
"height": 105.33206183359624,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aD",
|
||||
"roundness": {
|
||||
"type": 2
|
||||
},
|
||||
"seed": 1871768059,
|
||||
"version": 402,
|
||||
"versionNonce": 462603541,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758823931675,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"points": [
|
||||
[
|
||||
0,
|
||||
0
|
||||
],
|
||||
[
|
||||
53,
|
||||
8.23535374262218
|
||||
],
|
||||
[
|
||||
63,
|
||||
97.73535374262218
|
||||
],
|
||||
[
|
||||
132.9833600541258,
|
||||
105.33206183359624
|
||||
]
|
||||
],
|
||||
"lastCommittedPoint": null,
|
||||
"startBinding": {
|
||||
"elementId": "EcT-dGsjmfW571ov8Gg4F",
|
||||
"focus": -0.01598303536344995,
|
||||
"gap": 9.500000000000114
|
||||
},
|
||||
"endBinding": {
|
||||
"elementId": "l-O0rMS3SruV22_MPX9Jz",
|
||||
"focus": 0.10931526948750278,
|
||||
"gap": 13.22003639101672
|
||||
},
|
||||
"startArrowhead": null,
|
||||
"endArrowhead": "triangle",
|
||||
"elbowed": false
|
||||
}
|
||||
],
|
||||
"appState": {
|
||||
"gridSize": 20,
|
||||
"gridStep": 5,
|
||||
"gridModeEnabled": false,
|
||||
"viewBackgroundColor": "#ffffff"
|
||||
},
|
||||
"files": {}
|
||||
}
|
||||
658
Project_Model/UML/bpe.excalidraw.json
Normal file
658
Project_Model/UML/bpe.excalidraw.json
Normal file
@ -0,0 +1,658 @@
|
||||
{
|
||||
"type": "excalidraw",
|
||||
"version": 2,
|
||||
"source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor",
|
||||
"elements": [
|
||||
{
|
||||
"id": "EcT-dGsjmfW571ov8Gg4F",
|
||||
"type": "text",
|
||||
"x": 425.5,
|
||||
"y": 132,
|
||||
"width": 506,
|
||||
"height": 425,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"4rCC2-N1thmII8_dwNhe1"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a3V",
|
||||
"roundness": null,
|
||||
"seed": 523521109,
|
||||
"version": 883,
|
||||
"versionNonce": 1590682729,
|
||||
"isDeleted": false,
|
||||
"boundElements": [
|
||||
{
|
||||
"id": "OA_NKjb3n3NLtUo_tKmPS",
|
||||
"type": "arrow"
|
||||
}
|
||||
],
|
||||
"updated": 1758881654155,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocratesBPE(Encoder):\n - vocabulary: Vocabulary\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory,\n last_sentence_chunk: bool, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n + get_vocabulary_size() -> int\n \n",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocratesBPE(Encoder):\n - vocabulary: Vocabulary\n\n + fit(\n data: [[int]], \n memory: NanoSocratesBPE_BatchMemory,\n last_sentence_chunk: bool, \n last_batch: bool\n ) -> (Self, NanoSocratesBPE_BatchMemory)\n\n + encode(word: [byte]) -> [int]\n\n + decode(token: [int]) -> [byte]\n\n + get_vocabulary_size() -> int\n \n",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "74i4oK-JpcM4CgAqhz_x_",
|
||||
"type": "rectangle",
|
||||
"x": 382.5,
|
||||
"y": 104.5,
|
||||
"width": 592.5,
|
||||
"height": 421,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"4rCC2-N1thmII8_dwNhe1"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a4",
|
||||
"roundness": {
|
||||
"type": 3
|
||||
},
|
||||
"seed": 50827893,
|
||||
"version": 319,
|
||||
"versionNonce": 704459557,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758878226277,
|
||||
"link": null,
|
||||
"locked": false
|
||||
},
|
||||
{
|
||||
"id": "s8I1JoKulE3Vnti9a374p",
|
||||
"type": "text",
|
||||
"x": 1113.5,
|
||||
"y": 127,
|
||||
"width": 517,
|
||||
"height": 325,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"M6w9efVFwOZHkJGgwkyEw"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a5",
|
||||
"roundness": null,
|
||||
"seed": 2091174261,
|
||||
"version": 480,
|
||||
"versionNonce": 1964948039,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758881941367,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class Vocabulary:\n\n - vocabulary: dict<(int, int), int>\n - reverse_vocabulary: dict<int, (int, int)>\n\n + size -> int\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n \n\n",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class Vocabulary:\n\n - vocabulary: dict<(int, int), int>\n - reverse_vocabulary: dict<int, (int, int)>\n\n + size -> int\n\n + add_word(int) -> int\n + encode(int) -> int\n + decode(int) -> int\n \n\n",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "BY_Why7XDNftdMzPcwjVZ",
|
||||
"type": "rectangle",
|
||||
"x": 1086.5,
|
||||
"y": 105.5,
|
||||
"width": 593.0000000000001,
|
||||
"height": 325.5,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"M6w9efVFwOZHkJGgwkyEw"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a6",
|
||||
"roundness": {
|
||||
"type": 3
|
||||
},
|
||||
"seed": 153939611,
|
||||
"version": 234,
|
||||
"versionNonce": 2068149129,
|
||||
"isDeleted": false,
|
||||
"boundElements": [
|
||||
{
|
||||
"id": "WcDks9DR8UqeZEaxAcRf9",
|
||||
"type": "arrow"
|
||||
}
|
||||
],
|
||||
"updated": 1758881945661,
|
||||
"link": null,
|
||||
"locked": false
|
||||
},
|
||||
{
|
||||
"id": "JCPDhuTKRx4MN950Q3jL-",
|
||||
"type": "text",
|
||||
"x": 1116.411067193676,
|
||||
"y": 477.3809288774704,
|
||||
"width": 416.74578857421875,
|
||||
"height": 99.70355731225297,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"DbtlKVF_9SjH2-9iMq9zy"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a7",
|
||||
"roundness": null,
|
||||
"seed": 1326854235,
|
||||
"version": 479,
|
||||
"versionNonce": 595084597,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758902358518,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocrateBPE_BatchMemory:\n\n + frequency: dict<(int, int), int>\n + merge_treshold: int",
|
||||
"fontSize": 19.940711462450594,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocrateBPE_BatchMemory:\n\n + frequency: dict<(int, int), int>\n + merge_treshold: int",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "l-O0rMS3SruV22_MPX9Jz",
|
||||
"type": "rectangle",
|
||||
"x": 1086.5,
|
||||
"y": 451.4580039762846,
|
||||
"width": 593,
|
||||
"height": 208.0419960474308,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [
|
||||
"DbtlKVF_9SjH2-9iMq9zy"
|
||||
],
|
||||
"frameId": null,
|
||||
"index": "a8",
|
||||
"roundness": {
|
||||
"type": 3
|
||||
},
|
||||
"seed": 1490898171,
|
||||
"version": 305,
|
||||
"versionNonce": 587306139,
|
||||
"isDeleted": false,
|
||||
"boundElements": [
|
||||
{
|
||||
"id": "OA_NKjb3n3NLtUo_tKmPS",
|
||||
"type": "arrow"
|
||||
}
|
||||
],
|
||||
"updated": 1758902358518,
|
||||
"link": null,
|
||||
"locked": false
|
||||
},
|
||||
{
|
||||
"id": "WcDks9DR8UqeZEaxAcRf9",
|
||||
"type": "arrow",
|
||||
"x": 773.5,
|
||||
"y": 167,
|
||||
"width": 297.17936724485867,
|
||||
"height": 30,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aB",
|
||||
"roundness": {
|
||||
"type": 2
|
||||
},
|
||||
"seed": 1681364149,
|
||||
"version": 303,
|
||||
"versionNonce": 1262492265,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758881945661,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"points": [
|
||||
[
|
||||
0,
|
||||
0
|
||||
],
|
||||
[
|
||||
144.5,
|
||||
-1.5
|
||||
],
|
||||
[
|
||||
177.5,
|
||||
-30
|
||||
],
|
||||
[
|
||||
297.17936724485867,
|
||||
-29.020420978562214
|
||||
]
|
||||
],
|
||||
"lastCommittedPoint": null,
|
||||
"startBinding": null,
|
||||
"endBinding": {
|
||||
"elementId": "BY_Why7XDNftdMzPcwjVZ",
|
||||
"focus": 0.77319587628866,
|
||||
"gap": 18.25
|
||||
},
|
||||
"startArrowhead": null,
|
||||
"endArrowhead": "triangle",
|
||||
"elbowed": false
|
||||
},
|
||||
{
|
||||
"id": "OA_NKjb3n3NLtUo_tKmPS",
|
||||
"type": "arrow",
|
||||
"x": 946.0000000000002,
|
||||
"y": 274.95951048200493,
|
||||
"width": 130.016707976343,
|
||||
"height": 209.36808480159067,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aD",
|
||||
"roundness": {
|
||||
"type": 2
|
||||
},
|
||||
"seed": 1871768059,
|
||||
"version": 1039,
|
||||
"versionNonce": 213535035,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758902358519,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"points": [
|
||||
[
|
||||
0,
|
||||
0
|
||||
],
|
||||
[
|
||||
54.99999999999977,
|
||||
12.54048951799507
|
||||
],
|
||||
[
|
||||
69.49999999999977,
|
||||
188.54048951799507
|
||||
],
|
||||
[
|
||||
130.016707976343,
|
||||
209.36808480159067
|
||||
]
|
||||
],
|
||||
"lastCommittedPoint": null,
|
||||
"startBinding": {
|
||||
"elementId": "EcT-dGsjmfW571ov8Gg4F",
|
||||
"focus": -0.48312180762055096,
|
||||
"gap": 14.500000000000114
|
||||
},
|
||||
"endBinding": {
|
||||
"elementId": "l-O0rMS3SruV22_MPX9Jz",
|
||||
"focus": -0.16742658425737647,
|
||||
"gap": 11.194126334166185
|
||||
},
|
||||
"startArrowhead": null,
|
||||
"endArrowhead": "triangle",
|
||||
"elbowed": false
|
||||
},
|
||||
{
|
||||
"id": "snZ__VDsIlri6NTp8M2Gf",
|
||||
"type": "text",
|
||||
"x": -245.25,
|
||||
"y": 103,
|
||||
"width": 330,
|
||||
"height": 125,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aE",
|
||||
"roundness": null,
|
||||
"seed": 1758461093,
|
||||
"version": 265,
|
||||
"versionNonce": 1069481861,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758879566916,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocratesBPETrainer:\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocratesBPETrainer:\n\n - max_iterations: int\n - max_vocabulary_size: int\n - merge_treshold: int",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "PnbmqwEWYkP8oXElKFyTp",
|
||||
"type": "text",
|
||||
"x": -237.75,
|
||||
"y": 544,
|
||||
"width": 561,
|
||||
"height": 125,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aH",
|
||||
"roundness": null,
|
||||
"seed": 501304683,
|
||||
"version": 241,
|
||||
"versionNonce": 1306401003,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758878748210,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocratesSplitter:\n + splitter_regex: regex\n\n + split_text(corpus: str) -> [(str, TokenType)]\n",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocratesSplitter:\n + splitter_regex: regex\n\n + split_text(corpus: str) -> [(str, TokenType)]\n",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "xR_11IzgXX5O-m6WoRfCL",
|
||||
"type": "text",
|
||||
"x": -233.25,
|
||||
"y": 366.5,
|
||||
"width": 165,
|
||||
"height": 75,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aI",
|
||||
"roundness": null,
|
||||
"seed": 2025585125,
|
||||
"version": 395,
|
||||
"versionNonce": 1799178985,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758883940168,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "enum TokenType:\n + SPECIAL\n + BPE",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "enum TokenType:\n + SPECIAL\n + BPE",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "lgKSd9qCb94-5e8rd9I3r",
|
||||
"type": "text",
|
||||
"x": -219.75,
|
||||
"y": 764.5,
|
||||
"width": 462,
|
||||
"height": 275,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aJ",
|
||||
"roundness": null,
|
||||
"seed": 1963214021,
|
||||
"version": 422,
|
||||
"versionNonce": 903841927,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758879973600,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class TokeNano:\n\n - splitter: NanoSocratesSplitter\n - bpe_encoder: NanoSocratesBPE\n - special_encoder: NanoSocratesSpecial\n\n + encode(corpus: str) -> [int]\n\n - encode_special(piece: str) -> int\n\n - encode_bpe(piece: str) -> [int]",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class TokeNano:\n\n - splitter: NanoSocratesSplitter\n - bpe_encoder: NanoSocratesBPE\n - special_encoder: NanoSocratesSpecial\n\n + encode(corpus: str) -> [int]\n\n - encode_special(piece: str) -> int\n\n - encode_bpe(piece: str) -> [int]",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "DwFJoUpVT2YAEe9qPYAXa",
|
||||
"type": "text",
|
||||
"x": 496.75,
|
||||
"y": 666,
|
||||
"width": 440,
|
||||
"height": 100,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aL",
|
||||
"roundness": null,
|
||||
"seed": 1317596203,
|
||||
"version": 152,
|
||||
"versionNonce": 1840679687,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758880107704,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocratesSpecial(Encoder):\n\n + vocabulary: dict<str, int>\n + reverse_vocabulary: dict<int, str>",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocratesSpecial(Encoder):\n\n + vocabulary: dict<str, int>\n + reverse_vocabulary: dict<int, str>",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "78gC46xatoO1_cRtaN8EC",
|
||||
"type": "text",
|
||||
"x": 396.375,
|
||||
"y": -107.75,
|
||||
"width": 346.3997802734375,
|
||||
"height": 100,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aM",
|
||||
"roundness": null,
|
||||
"seed": 1187595241,
|
||||
"version": 128,
|
||||
"versionNonce": 1487192455,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758879825591,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class Encoder(ABC):\n\n + encode(corpus: str) -> [int]\n + decode(encoded: [int]) -> str ",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 5,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class Encoder(ABC):\n\n + encode(corpus: str) -> [int]\n + decode(encoded: [int]) -> str ",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "3j50Ds74uU7oXoJ9kMOYJ",
|
||||
"type": "text",
|
||||
"x": 457.375,
|
||||
"y": 903.75,
|
||||
"width": 949.7594604492188,
|
||||
"height": 25,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aN",
|
||||
"roundness": null,
|
||||
"seed": 1994335529,
|
||||
"version": 198,
|
||||
"versionNonce": 1492696519,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758882694747,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "@@mamma@@è bell^^issima e @@^^le voglio molto b^^ene @--@ replit ^^ è molto ^^bello e^^ lo amo",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 5,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "@@mamma@@è bell^^issima e @@^^le voglio molto b^^ene @--@ replit ^^ è molto ^^bello e^^ lo amo",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
},
|
||||
{
|
||||
"id": "yg-TvQvz4MwJZ0y8K7Ix0",
|
||||
"type": "text",
|
||||
"x": 435.375,
|
||||
"y": 1026.25,
|
||||
"width": 352,
|
||||
"height": 250,
|
||||
"angle": 0,
|
||||
"strokeColor": "#1e1e1e",
|
||||
"backgroundColor": "transparent",
|
||||
"fillStyle": "solid",
|
||||
"strokeWidth": 2,
|
||||
"strokeStyle": "solid",
|
||||
"roughness": 1,
|
||||
"opacity": 100,
|
||||
"groupIds": [],
|
||||
"frameId": null,
|
||||
"index": "aP",
|
||||
"roundness": null,
|
||||
"seed": 1877486407,
|
||||
"version": 344,
|
||||
"versionNonce": 25830153,
|
||||
"isDeleted": false,
|
||||
"boundElements": [],
|
||||
"updated": 1758883468886,
|
||||
"link": null,
|
||||
"locked": false,
|
||||
"text": "class NanoSocratesChunker:\n\n - max_bytes: int\n - max_special_length: int\n - special_token_regex: regex\n\n - residuals: str\n\n # This must be an iterator\n + read(path: Path) -> str",
|
||||
"fontSize": 20,
|
||||
"fontFamily": 8,
|
||||
"textAlign": "left",
|
||||
"verticalAlign": "top",
|
||||
"containerId": null,
|
||||
"originalText": "class NanoSocratesChunker:\n\n - max_bytes: int\n - max_special_length: int\n - special_token_regex: regex\n\n - residuals: str\n\n # This must be an iterator\n + read(path: Path) -> str",
|
||||
"autoResize": true,
|
||||
"lineHeight": 1.25
|
||||
}
|
||||
],
|
||||
"appState": {
|
||||
"gridSize": 20,
|
||||
"gridStep": 5,
|
||||
"gridModeEnabled": false,
|
||||
"viewBackgroundColor": "#ffffff"
|
||||
},
|
||||
"files": {}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user