{ "cells": [ { "cell_type": "markdown", "id": "762d4737-1be6-4197-8aeb-b26063abaefd", "metadata": {}, "source": [ "# Example Datasets" ] }, { "cell_type": "code", "execution_count": 1, "id": "ba31834b-30e0-4b27-8fbf-9fb6dce778d2", "metadata": {}, "outputs": [], "source": [ "import os\n", "import warnings\n", "\n", "os.environ[\"OMP_NUM_THREADS\"] = \"4\"\n", "os.environ[\"OPENBLAS_NUM_THREADS\"] = \"4\"\n", "os.environ[\"MKL_NUM_THREADS\"] = \"4\"\n", "os.environ[\"VECLIB_MAXIMUM_THREADS\"] = \"4\"\n", "os.environ[\"NUMEXPR_NUM_THREADS\"] = \"4\"\n", "\n", "from grnet.toydata import load_dataset, load_metadata\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from sklearn.manifold import TSNE\n", "\n", "warnings.filterwarnings(\"ignore\", category=RuntimeWarning, module=\"threadpoolctl\")" ] }, { "cell_type": "markdown", "id": "913aaeb1-32a7-4334-aaa0-4226839a2d79", "metadata": {}, "source": [ "Example datasets include followings:\n", "- Count data ($\\log_2(RPM+1)$)\n", "- metadata" ] }, { "cell_type": "markdown", "id": "36ca1157-7ef6-45ca-b33c-56ba92ba80b8", "metadata": {}, "source": [ "---\n", "## Prototype1" ] }, { "cell_type": "code", "execution_count": 2, "id": "aad4f0cc-4ab5-4805-8f0c-422a38962f84", "metadata": {}, "outputs": [], "source": [ "data1 = load_dataset(\"prototype1\")\n", "meta1 = load_metadata(\"prototype1\")" ] }, { "cell_type": "markdown", "id": "eea72552-7d2f-4004-9a8e-019cdcf6aaa6", "metadata": {}, "source": [ "NxD matrix will be given (N: number of samples, D: number of genes)" ] }, { "cell_type": "code", "execution_count": 3, "id": "2465f844-5809-4fd9-bdac-48e82deb6313", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | gene_1 | \n", "gene_2 | \n", "gene_3 | \n", "gene_4 | \n", "gene_5 | \n", "gene_6 | \n", "gene_7 | \n", "gene_8 | \n", "gene_9 | \n", "gene_10 | \n", "... | \n", "gene_9991 | \n", "gene_9992 | \n", "gene_9993 | \n", "gene_9994 | \n", "gene_9995 | \n", "gene_9996 | \n", "gene_9997 | \n", "gene_9998 | \n", "gene_9999 | \n", "gene_10000 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sample_1 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "2.753703 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "10.273207 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
| sample_2 | \n", "0.0 | \n", "0.0 | \n", "2.380056 | \n", "0.0 | \n", "0.000000 | \n", "1.633563 | \n", "9.828775 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "2.869546 | \n", "0.000000 | \n", "4.155610 | \n", "0.000000 | \n", "9.673981 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
| sample_3 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.083379 | \n", "0.000000 | \n", "9.719177 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
| sample_4 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "3.832107 | \n", "0.0 | \n", "... | \n", "8.609340 | \n", "6.330085 | \n", "0.000000 | \n", "0.000000 | \n", "3.939771 | \n", "10.232104 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
| sample_5 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "3.034451 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "10.091159 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| sample_996 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "3.380126 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "2.048660 | \n", "
| sample_997 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.194422 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "4.489439 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "3.552277 | \n", "
| sample_998 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "8.068697 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "4.741316 | \n", "
| sample_999 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "1.796236 | \n", "0.000000 | \n", "2.571982 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "3.740458 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "3.985389 | \n", "
| sample_1000 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "7.084269 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "9.423962 | \n", "
1000 rows × 10000 columns
\n", "| \n", " | cluster | \n", "
|---|---|
| sample_1 | \n", "1 | \n", "
| sample_2 | \n", "1 | \n", "
| sample_3 | \n", "1 | \n", "
| sample_4 | \n", "1 | \n", "
| sample_5 | \n", "1 | \n", "
| ... | \n", "... | \n", "
| sample_996 | \n", "5 | \n", "
| sample_997 | \n", "5 | \n", "
| sample_998 | \n", "5 | \n", "
| sample_999 | \n", "5 | \n", "
| sample_1000 | \n", "5 | \n", "
1000 rows × 1 columns
\n", "| \n", " | gene_1 | \n", "gene_2 | \n", "gene_3 | \n", "gene_4 | \n", "gene_5 | \n", "gene_6 | \n", "gene_7 | \n", "gene_8 | \n", "gene_9 | \n", "gene_10 | \n", "... | \n", "gene_9991 | \n", "gene_9992 | \n", "gene_9993 | \n", "gene_9994 | \n", "gene_9995 | \n", "gene_9996 | \n", "gene_9997 | \n", "gene_9998 | \n", "gene_9999 | \n", "gene_10000 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sample_1 | \n", "0.000000 | \n", "3.507129 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "3.507129 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "8.004430 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| sample_2 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "1.715359 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "2.477047 | \n", "4.707172 | \n", "0.000000 | \n", "2.97294 | \n", "0.0 | \n", "8.314904 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| sample_3 | \n", "4.567996 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "4.567996 | \n", "1.941078 | \n", "0.00000 | \n", "0.0 | \n", "7.904205 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| sample_4 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "4.320645 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "8.602809 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| sample_5 | \n", "0.000000 | \n", "8.222258 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "4.526527 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "7.378120 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| sample_1496 | \n", "3.786429 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "3.169765 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "8.647279 | \n", "1.378401 | \n", "
| sample_1497 | \n", "6.193268 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "3.366368 | \n", "0.000000 | \n", "10.656825 | \n", "0.000000 | \n", "
| sample_1498 | \n", "6.028006 | \n", "3.371017 | \n", "0.000000 | \n", "3.001727 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "3.371017 | \n", "0.000000 | \n", "10.925548 | \n", "0.000000 | \n", "
| sample_1499 | \n", "5.144795 | \n", "0.000000 | \n", "1.595626 | \n", "2.821051 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "8.670563 | \n", "... | \n", "2.334715 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "3.473954 | \n", "3.715184 | \n", "9.994661 | \n", "0.000000 | \n", "
| sample_1500 | \n", "4.536216 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.0 | \n", "4.205152 | \n", "0.000000 | \n", "9.952163 | \n", "0.000000 | \n", "
1500 rows × 10000 columns
\n", "| \n", " | cluster | \n", "
|---|---|
| sample_1 | \n", "1 | \n", "
| sample_2 | \n", "1 | \n", "
| sample_3 | \n", "1 | \n", "
| sample_4 | \n", "1 | \n", "
| sample_5 | \n", "1 | \n", "
| ... | \n", "... | \n", "
| sample_1496 | \n", "9 | \n", "
| sample_1497 | \n", "9 | \n", "
| sample_1498 | \n", "9 | \n", "
| sample_1499 | \n", "9 | \n", "
| sample_1500 | \n", "9 | \n", "
1500 rows × 1 columns
\n", "