|
10 | 10 | "This tutorial assumes you have read introductory material on\n", |
11 | 11 | "https://github.com/INCATools/semantic-sql\n", |
12 | 12 | "\n", |
13 | | - "\n" |
| 13 | + "To run this notebook, we recommend cloning the repo and installing all dependencies via poetry:\n", |
| 14 | + "\n", |
| 15 | + "```\n", |
| 16 | + "git clone https://github.com/INCATools/semantic-sql.git\n", |
| 17 | + "cd semantic-sql\n", |
| 18 | + "poetry install\n", |
| 19 | + "poetry run jupyter notebook\n", |
| 20 | + "```\n" |
14 | 21 | ] |
15 | 22 | }, |
16 | 23 | { |
|
45 | 52 | }, |
46 | 53 | { |
47 | 54 | "cell_type": "code", |
48 | | - "execution_count": 41, |
| 55 | + "execution_count": 5, |
49 | 56 | "id": "9f75b98c", |
50 | 57 | "metadata": {}, |
51 | 58 | "outputs": [ |
52 | 59 | { |
53 | 60 | "name": "stdout", |
54 | 61 | "output_type": "stream", |
55 | 62 | "text": [ |
56 | | - "--2022-08-15 17:46:55-- https://s3.amazonaws.com/bbop-sqlite/cl.db.gz\n", |
57 | | - "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.81.11\n", |
58 | | - "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.81.11|:443... connected.\n", |
59 | | - "HTTP request sent, awaiting response... 200 OK\n", |
60 | | - "Length: 124417954 (119M) [binary/octet-stream]\n", |
61 | | - "Saving to: ‘cl.db.gz’\n", |
62 | | - "\n", |
63 | | - "cl.db.gz 100%[===================>] 118.65M 5.89MB/s in 29s \n", |
64 | | - "\n", |
65 | | - "2022-08-15 17:47:25 (4.06 MB/s) - ‘cl.db.gz’ saved [124417954/124417954]\n", |
| 63 | + "--2022-08-16 09:49:00-- https://s3.amazonaws.com/bbop-sqlite/cl.db.gz\n", |
| 64 | + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 3.5.16.140\n", |
| 65 | + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|3.5.16.140|:443... connected.\n", |
| 66 | + "HTTP request sent, awaiting response... 304 Not Modified\n", |
| 67 | + "File ‘cl.db.gz’ not modified on server. Omitting download.\n", |
66 | 68 | "\n" |
67 | 69 | ] |
68 | 70 | } |
69 | 71 | ], |
70 | 72 | "source": [ |
71 | | - "!wget https://s3.amazonaws.com/bbop-sqlite/cl.db.gz -O cl.db.gz" |
| 73 | + "!wget -N https://s3.amazonaws.com/bbop-sqlite/cl.db.gz" |
| 74 | + ] |
| 75 | + }, |
| 76 | + { |
| 77 | + "cell_type": "markdown", |
| 78 | + "id": "2e6d48f5", |
| 79 | + "metadata": {}, |
| 80 | + "source": [ |
| 81 | + "next decompress it (keep the original around with `-k` so that subsequent runs of this notebook work)" |
72 | 82 | ] |
73 | 83 | }, |
74 | 84 | { |
75 | 85 | "cell_type": "code", |
76 | | - "execution_count": 42, |
| 86 | + "execution_count": 6, |
77 | 87 | "id": "f2e775f8", |
78 | 88 | "metadata": {}, |
79 | 89 | "outputs": [], |
80 | 90 | "source": [ |
81 | | - "!gzip -d cl.db.gz" |
| 91 | + "!gzip -dfk cl.db.gz" |
82 | 92 | ] |
83 | 93 | }, |
84 | 94 | { |
85 | 95 | "cell_type": "markdown", |
86 | 96 | "id": "d3cb4a21", |
87 | 97 | "metadata": {}, |
88 | 98 | "source": [ |
89 | | - "This is quite large so it make take a few minutes depending on your connection.\n", |
| 99 | + "The initial download is quite large so it make take a minute or so depending on your connection.\n", |
| 100 | + "\n", |
| 101 | + "### Connecting\n", |
90 | 102 | "\n", |
91 | 103 | "next, some Jupyter magic to connect to cl.db\n", |
92 | 104 | "\n", |
|
131 | 143 | }, |
132 | 144 | { |
133 | 145 | "cell_type": "code", |
134 | | - "execution_count": 4, |
| 146 | + "execution_count": 7, |
135 | 147 | "id": "305a4293", |
136 | 148 | "metadata": {}, |
137 | 149 | "outputs": [ |
|
261 | 273 | " ('CL:0000101', 'CL:0000101', 'rdfs:label', None, 'sensory neuron', 'xsd:string', None)]" |
262 | 274 | ] |
263 | 275 | }, |
264 | | - "execution_count": 4, |
| 276 | + "execution_count": 7, |
265 | 277 | "metadata": {}, |
266 | 278 | "output_type": "execute_result" |
267 | 279 | } |
|
294 | 306 | }, |
295 | 307 | { |
296 | 308 | "cell_type": "code", |
297 | | - "execution_count": 7, |
| 309 | + "execution_count": 8, |
298 | 310 | "id": "5207ce6c", |
299 | 311 | "metadata": {}, |
300 | 312 | "outputs": [ |
|
434 | 446 | " ('CL:0007011', 'CL:0007011', 'rdfs:subClassOf', 'CL:0000029', None, None, None)]" |
435 | 447 | ] |
436 | 448 | }, |
437 | | - "execution_count": 7, |
| 449 | + "execution_count": 8, |
438 | 450 | "metadata": {}, |
439 | 451 | "output_type": "execute_result" |
440 | 452 | } |
|
481 | 493 | }, |
482 | 494 | { |
483 | 495 | "cell_type": "code", |
484 | | - "execution_count": 6, |
| 496 | + "execution_count": 9, |
485 | 497 | "id": "e9700bea", |
486 | 498 | "metadata": {}, |
487 | 499 | "outputs": [ |
|
537 | 549 | " ('CL:0007011', 'rdfs:subClassOf', 'CL:0000107')]" |
538 | 550 | ] |
539 | 551 | }, |
540 | | - "execution_count": 6, |
| 552 | + "execution_count": 9, |
541 | 553 | "metadata": {}, |
542 | 554 | "output_type": "execute_result" |
543 | 555 | } |
|
558 | 570 | }, |
559 | 571 | { |
560 | 572 | "cell_type": "code", |
561 | | - "execution_count": 6, |
| 573 | + "execution_count": 11, |
562 | 574 | "id": "8a5dd674", |
563 | 575 | "metadata": {}, |
564 | 576 | "outputs": [ |
|
642 | 654 | " ('CL:0007011', 'rdfs:subClassOf', 'CL:0000107', None, 'autonomic neuron')]" |
643 | 655 | ] |
644 | 656 | }, |
645 | | - "execution_count": 6, |
| 657 | + "execution_count": 11, |
646 | 658 | "metadata": {}, |
647 | 659 | "output_type": "execute_result" |
648 | 660 | } |
|
688 | 700 | }, |
689 | 701 | { |
690 | 702 | "cell_type": "code", |
691 | | - "execution_count": 11, |
| 703 | + "execution_count": 13, |
692 | 704 | "id": "e4d791ab", |
693 | 705 | "metadata": {}, |
694 | 706 | "outputs": [ |
|
834 | 846 | " ('CL:0007011', 'RO:0002100', 'BFO:0000040')]" |
835 | 847 | ] |
836 | 848 | }, |
837 | | - "execution_count": 11, |
| 849 | + "execution_count": 13, |
838 | 850 | "metadata": {}, |
839 | 851 | "output_type": "execute_result" |
840 | 852 | } |
|
857 | 869 | }, |
858 | 870 | { |
859 | 871 | "cell_type": "code", |
860 | | - "execution_count": 40, |
| 872 | + "execution_count": 14, |
861 | 873 | "id": "e13226a1", |
862 | 874 | "metadata": {}, |
863 | 875 | "outputs": [ |
|
982 | 994 | " ('CL:2000086', 'neocortex basket cell')]" |
983 | 995 | ] |
984 | 996 | }, |
985 | | - "execution_count": 40, |
| 997 | + "execution_count": 14, |
986 | 998 | "metadata": {}, |
987 | 999 | "output_type": "execute_result" |
988 | 1000 | } |
|
1016 | 1028 | }, |
1017 | 1029 | { |
1018 | 1030 | "cell_type": "code", |
1019 | | - "execution_count": 43, |
| 1031 | + "execution_count": 16, |
1020 | 1032 | "id": "c9cfcbdd", |
1021 | 1033 | "metadata": {}, |
1022 | 1034 | "outputs": [ |
|
1044 | 1056 | "[(29117,)]" |
1045 | 1057 | ] |
1046 | 1058 | }, |
1047 | | - "execution_count": 43, |
| 1059 | + "execution_count": 16, |
1048 | 1060 | "metadata": {}, |
1049 | 1061 | "output_type": "execute_result" |
1050 | 1062 | } |
|
1122 | 1134 | }, |
1123 | 1135 | { |
1124 | 1136 | "cell_type": "code", |
1125 | | - "execution_count": 7, |
| 1137 | + "execution_count": 17, |
1126 | 1138 | "id": "e1fa05a9", |
1127 | 1139 | "metadata": {}, |
1128 | 1140 | "outputs": [], |
|
1132 | 1144 | }, |
1133 | 1145 | { |
1134 | 1146 | "cell_type": "code", |
1135 | | - "execution_count": 10, |
| 1147 | + "execution_count": 18, |
1136 | 1148 | "id": "e1d7b90c", |
1137 | 1149 | "metadata": {}, |
1138 | 1150 | "outputs": [], |
|
1155 | 1167 | }, |
1156 | 1168 | { |
1157 | 1169 | "cell_type": "code", |
1158 | | - "execution_count": 12, |
| 1170 | + "execution_count": 20, |
1159 | 1171 | "id": "753403a0", |
1160 | 1172 | "metadata": {}, |
1161 | 1173 | "outputs": [ |
|
1188 | 1200 | }, |
1189 | 1201 | { |
1190 | 1202 | "cell_type": "code", |
1191 | | - "execution_count": 13, |
| 1203 | + "execution_count": 21, |
1192 | 1204 | "id": "14c2663b", |
1193 | 1205 | "metadata": {}, |
1194 | 1206 | "outputs": [], |
|
1198 | 1210 | }, |
1199 | 1211 | { |
1200 | 1212 | "cell_type": "code", |
1201 | | - "execution_count": 14, |
| 1213 | + "execution_count": 22, |
1202 | 1214 | "id": "8bfc77cd", |
1203 | 1215 | "metadata": {}, |
1204 | 1216 | "outputs": [], |
|
1208 | 1220 | }, |
1209 | 1221 | { |
1210 | 1222 | "cell_type": "code", |
1211 | | - "execution_count": 16, |
| 1223 | + "execution_count": 23, |
1212 | 1224 | "id": "a7ca1296", |
1213 | 1225 | "metadata": {}, |
1214 | 1226 | "outputs": [ |
|
1253 | 1265 | }, |
1254 | 1266 | { |
1255 | 1267 | "cell_type": "code", |
1256 | | - "execution_count": 22, |
| 1268 | + "execution_count": 24, |
1257 | 1269 | "id": "28bdc16d", |
1258 | 1270 | "metadata": {}, |
1259 | 1271 | "outputs": [], |
|
1264 | 1276 | }, |
1265 | 1277 | { |
1266 | 1278 | "cell_type": "code", |
1267 | | - "execution_count": 23, |
| 1279 | + "execution_count": 25, |
1268 | 1280 | "id": "88d75a8d", |
1269 | 1281 | "metadata": {}, |
1270 | 1282 | "outputs": [], |
|
1286 | 1298 | }, |
1287 | 1299 | { |
1288 | 1300 | "cell_type": "code", |
1289 | | - "execution_count": 27, |
| 1301 | + "execution_count": 26, |
1290 | 1302 | "id": "5884e222", |
1291 | 1303 | "metadata": {}, |
1292 | 1304 | "outputs": [ |
|
1319 | 1331 | }, |
1320 | 1332 | { |
1321 | 1333 | "cell_type": "code", |
1322 | | - "execution_count": 33, |
| 1334 | + "execution_count": 27, |
1323 | 1335 | "id": "7f80c503", |
1324 | 1336 | "metadata": {}, |
1325 | 1337 | "outputs": [ |
|
1350 | 1362 | }, |
1351 | 1363 | { |
1352 | 1364 | "cell_type": "code", |
1353 | | - "execution_count": 35, |
| 1365 | + "execution_count": 28, |
1354 | 1366 | "id": "6d5749c9", |
1355 | 1367 | "metadata": {}, |
1356 | 1368 | "outputs": [ |
|
1399 | 1411 | }, |
1400 | 1412 | { |
1401 | 1413 | "cell_type": "code", |
1402 | | - "execution_count": 28, |
| 1414 | + "execution_count": 29, |
1403 | 1415 | "id": "58709734", |
1404 | 1416 | "metadata": {}, |
1405 | 1417 | "outputs": [ |
|
1447 | 1459 | " | ----------------------------------------------------------------------\n", |
1448 | 1460 | " | Data and other attributes defined here:\n", |
1449 | 1461 | " | \n", |
1450 | | - " | __mapper__ = <Mapper at 0x7fdfe078cee0; Edge>\n", |
| 1462 | + " | __mapper__ = <Mapper at 0x7ff21866fa90; Edge>\n", |
1451 | 1463 | " | \n", |
1452 | 1464 | " | __mapper_args__ = {'concrete': True}\n", |
1453 | 1465 | " | \n", |
|
0 commit comments