From 69b7cd6a99fca5af95e07dfee83dae6e5c2c73fa Mon Sep 17 00:00:00 2001 From: GitJournal Date: Mon, 8 Jan 2024 02:22:53 +0800 Subject: [PATCH 01/28] update1 --- .fdignore | 5 + docs/.gitignore | 4 + docs/codeview.html | 522 +++++++++++++ docs/data/0.json | 290 +++++++ .../0c3fc06c-6184-4357-85d6-348275dd29c6.json | 25 + .../2702f9ef-853c-453e-a9d3-52a7f948e47d.json | 10 + .../6a64bc63-97f1-469f-8134-72ca11c14704.json | 10 + .../6e2219bf-c085-433e-9e45-71ef30ea0a5a.json | 10 + .../ba82746b-979a-400b-9ee9-a0c71b845a6b.json | 25 + .../bed0c727-b3e6-471e-9e70-c52fd603fa0d.json | 15 + .../c73a6545-b180-4570-aada-92761b8c15d5.json | 15 + .../d4fbdf63-7c60-4310-adf4-002231b4814d.json | 10 + .../ee2a8a01-bbe2-4f6a-959c-188fbbe67bbf.json | 15 + docs/index.html | 724 ++++++++++++++++++ docs/metadata.json | 55 ++ docs/src/README.md | 11 + docs/src/dall_e/__init__.py | 18 + docs/src/dall_e/decoder.py | 94 +++ docs/src/dall_e/encoder.py | 93 +++ docs/src/dall_e/utils.py | 59 ++ docs/src/model_card.md | 41 + docs/src/notebooks/usage.py | 80 ++ docs/src/requirements.txt | 8 + docs/src/setup.py | 16 + notebooks/usage.py | 80 ++ 25 files changed, 2235 insertions(+) create mode 100644 .fdignore create mode 100644 docs/.gitignore create mode 100644 docs/codeview.html create mode 100644 docs/data/0.json create mode 100644 docs/doc/0c3fc06c-6184-4357-85d6-348275dd29c6.json create mode 100644 docs/doc/2702f9ef-853c-453e-a9d3-52a7f948e47d.json create mode 100644 docs/doc/6a64bc63-97f1-469f-8134-72ca11c14704.json create mode 100644 docs/doc/6e2219bf-c085-433e-9e45-71ef30ea0a5a.json create mode 100644 docs/doc/ba82746b-979a-400b-9ee9-a0c71b845a6b.json create mode 100644 docs/doc/bed0c727-b3e6-471e-9e70-c52fd603fa0d.json create mode 100644 docs/doc/c73a6545-b180-4570-aada-92761b8c15d5.json create mode 100644 docs/doc/d4fbdf63-7c60-4310-adf4-002231b4814d.json create mode 100644 docs/doc/ee2a8a01-bbe2-4f6a-959c-188fbbe67bbf.json create mode 100644 docs/index.html create mode 100644 docs/metadata.json create mode 100644 docs/src/README.md create mode 100644 docs/src/dall_e/__init__.py create mode 100644 docs/src/dall_e/decoder.py create mode 100644 docs/src/dall_e/encoder.py create mode 100644 docs/src/dall_e/utils.py create mode 100644 docs/src/model_card.md create mode 100644 docs/src/notebooks/usage.py create mode 100644 docs/src/requirements.txt create mode 100644 docs/src/setup.py create mode 100644 notebooks/usage.py diff --git a/.fdignore b/.fdignore new file mode 100644 index 0000000..0ddd9da --- /dev/null +++ b/.fdignore @@ -0,0 +1,5 @@ +docs +.* +*/.* +*.ipynb +LICENSE \ No newline at end of file diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..c6a61aa --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,4 @@ +!.gitignore +!* +!*/* +cache_db.json diff --git a/docs/codeview.html b/docs/codeview.html new file mode 100644 index 0000000..9e2cb49 --- /dev/null +++ b/docs/codeview.html @@ -0,0 +1,522 @@ + + + + + + + + + Code View + + + + + + + + + + + +
+

Code Preview

+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/data/0.json b/docs/data/0.json new file mode 100644 index 0000000..5b2c136 --- /dev/null +++ b/docs/data/0.json @@ -0,0 +1,290 @@ +{ + "0": { + "file_id": 0, + "content": "/README.md", + "type": "filepath" + }, + "1": { + "file_id": 0, + "content": "This is the official PyTorch package for DALL-E's discrete VAE. Transformer for image generation from text not included. Install using pip install DALL-E.", + "type": "summary" + }, + "2": { + "file_id": 0, + "content": "# Overview\n[[Blog]](https://openai.com/blog/dall-e/) [[Paper]](https://arxiv.org/abs/2102.12092) [[Model Card]](model_card.md) [[Usage]](notebooks/usage.ipynb)\nThis is the official PyTorch package for the discrete VAE used for DALL·E. The transformer used to generate the images from the text is not part of this code release.\n# Installation\nBefore running [the example notebook](notebooks/usage.ipynb), you will need to install the package using\n\tpip install DALL-E", + "type": "code", + "location": "/README.md:1-11" + }, + "3": { + "file_id": 0, + "content": "This is the official PyTorch package for DALL-E's discrete VAE. Transformer for image generation from text not included. Install using pip install DALL-E.", + "type": "comment" + }, + "4": { + "file_id": 1, + "content": "/model_card.md", + "type": "filepath" + }, + "5": { + "file_id": 1, + "content": "DALL·E's dVAE model by OpenAI reduces memory footprint but is unsuitable for high-fidelity image processing and general-purpose image compression due to loss of fine details.", + "type": "summary" + }, + "6": { + "file_id": 1, + "content": "# Model Card: DALL·E dVAE\nFollowing [Model Cards for Model Reporting (Mitchell et al.)](https://arxiv.org/abs/1810.03993) and [Lessons from\nArchives (Jo & Gebru)](https://arxiv.org/pdf/1912.10389.pdf), we're providing some information about about the discrete\nVAE (dVAE) that was used to train DALL·E.\n## Model Details\nThe dVAE was developed by researchers at OpenAI to reduce the memory footprint of the transformer trained on the\ntext-to-image generation task. The details involved in training the dVAE are described in [the paper][dalle_paper]. This\nmodel card describes the first version of the model, released in February 2021. The model consists of a convolutional\nencoder and decoder whose architectures are described [here](dall_e/encoder.py) and [here](dall_e/decoder.py), respectively.\nFor questions or comments about the models or the code release, please file a Github issue.\n## Model Use\n### Intended Use\nThe model is intended for others to use for training their own generative models.\n### Out-of-Scope Use Cases", + "type": "code", + "location": "/model_card.md:1-21" + }, + "7": { + "file_id": 1, + "content": "This is the model card for DALL·E's discrete VAE (dVAE), which was developed by OpenAI to reduce transformer memory footprint.", + "type": "comment" + }, + "8": { + "file_id": 1, + "content": "This model is inappropriate for high-fidelity image processing applications. We also do not recommend its use as a\ngeneral-purpose image compressor.\n## Training Data\nThe model was trained on publicly available text-image pairs collected from the internet. This data consists partly of\n[Conceptual Captions][cc] and a filtered subset of [YFCC100M][yfcc100m]. We used a subset of the filters described in\n[Sharma et al.][cc_paper] to construct this dataset; further details are described in [our paper][dalle_paper]. We will\nnot be releasing the dataset.\n## Performance and Limitations\nThe heavy compression from the encoding process results in a noticeable loss of detail in the reconstructed images. This\nrenders it inappropriate for applications that require fine-grained details of the image to be preserved.\n[dalle_paper]: https://arxiv.org/abs/2102.12092\n[cc]: https://ai.google.com/research/ConceptualCaptions\n[cc_paper]: https://www.aclweb.org/anthology/P18-1238/\n[yfcc100m]: http://projects.dfki.uni-kl.de/yfcc100m/", + "type": "code", + "location": "/model_card.md:23-41" + }, + "9": { + "file_id": 1, + "content": "The model is not suitable for high-fidelity image processing or general-purpose image compression. It was trained on a mix of Conceptual Captions and filtered YFCC100M datasets using specific filters, details in the paper. The dataset will not be released. Compression leads to loss of fine image details, making it unsuitable for applications requiring preserved details.", + "type": "comment" + }, + "10": { + "file_id": 2, + "content": "/requirements.txt", + "type": "filepath" + }, + "11": { + "file_id": 2, + "content": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "type": "summary" + }, + "12": { + "file_id": 2, + "content": "Pillow\nblobfile\nmypy\nnumpy\npytest\nrequests\ntorch\ntorchvision", + "type": "code", + "location": "/requirements.txt:1-8" + }, + "13": { + "file_id": 2, + "content": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "type": "comment" + }, + "14": { + "file_id": 3, + "content": "/setup.py", + "type": "filepath" + }, + "15": { + "file_id": 3, + "content": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", + "type": "summary" + }, + "16": { + "file_id": 3, + "content": "from setuptools import setup\ndef parse_requirements(filename):\n\tlines = (line.strip() for line in open(filename))\n\treturn [line for line in lines if line and not line.startswith(\"#\")]\nsetup(name='DALL-E',\n version='0.1',\n description='PyTorch package for the discrete VAE used for DALL·E.',\n url='http://github.com/openai/DALL-E',\n author='Aditya Ramesh',\n author_email='aramesh@openai.com',\n license='BSD',\n packages=['dall_e'],\n install_requires=parse_requirements('requirements.txt'),\n zip_safe=True)", + "type": "code", + "location": "/setup.py:1-16" + }, + "17": { + "file_id": 3, + "content": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", + "type": "comment" + }, + "18": { + "file_id": 4, + "content": "/dall_e/__init__.py", + "type": "filepath" + }, + "19": { + "file_id": 4, + "content": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", + "type": "summary" + }, + "20": { + "file_id": 4, + "content": "import io, requests\nimport torch\nimport torch.nn as nn\nfrom dall_e.encoder import Encoder\nfrom dall_e.decoder import Decoder\nfrom dall_e.utils import map_pixels, unmap_pixels\ndef load_model(path: str, device: torch.device = None) -> nn.Module:\n if path.startswith('http://') or path.startswith('https://'):\n resp = requests.get(path)\n resp.raise_for_status()\n with io.BytesIO(resp.content) as buf:\n return torch.load(buf, map_location=device)\n else:\n with open(path, 'rb') as f:\n return torch.load(f, map_location=device)", + "type": "code", + "location": "/dall_e/__init__.py:1-18" + }, + "21": { + "file_id": 4, + "content": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", + "type": "comment" + }, + "22": { + "file_id": 5, + "content": "/dall_e/decoder.py", + "type": "filepath" + }, + "23": { + "file_id": 5, + "content": "PyTorch models are described in both comments, with Comment A focusing on DALL-E's architecture involving multiple groups and partial functions for convolutional layers. In contrast, Comment B presents an encoder-decoder model using convolutional layers, residual connections, and ReLU activations.", + "type": "summary" + }, + "24": { + "file_id": 5, + "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass DecoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 1)),\n\t\t\t\t('relu_2', nn.ReLU()),", + "type": "code", + "location": "/dall_e/decoder.py:1-31" + }, + "25": { + "file_id": 5, + "content": "This code defines a DecoderBlock class which is a neural network module. It takes input size (n_in), output size (n_out), number of layers (n_layers), device, and requires_grad as attributes. It initializes the hidden layer size (n_hid), post gain value, and makes convolution layers using partial function. The id_path is an identity path if n_in == n_out, otherwise it's a convolution layer. The res_path is a sequence of ReLU activations and convolution layers.", + "type": "comment" + }, + "26": { + "file_id": 5, + "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 3)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Decoder(nn.Module):\n\tgroup_count: int = 4\n\tn_init: int = attr.ib(default=128, validator=lambda i, a, x: x >= 8)\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\toutput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:", + "type": "code", + "location": "/dall_e/decoder.py:32-54" + }, + "27": { + "file_id": 5, + "content": "This code defines a class called Decoder which is an instance of the nn.Module class in PyTorch. It has several attributes such as group_count, n_init, n_hid, n_blk_per_group, output_channels, vocab_size, device, requires_grad, and use_mixed_precision. The forward method is defined to compute the forward pass of the decoder network. It uses a combination of the id_path and res_path outputs, which are likely residual paths in the network. The make_conv function seems to be used to create convolutional layers with specified parameters.", + "type": "comment" + }, + "28": { + "file_id": 5, + "content": "\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)\n\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(DecoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.vocab_size, self.n_init, 1, use_float16=False)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(self.n_init if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(8 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],", + "type": "code", + "location": "/dall_e/decoder.py:55-74" + }, + "29": { + "file_id": 5, + "content": "This code initializes a neural network for the DALL-E model, consisting of multiple groups with progressively smaller block sizes. It uses partial functions to create convolutional layers and blocks. The input is fed through a series of upsampling and convolution operations in each group before being processed by the final output layer.", + "type": "comment" + }, + "30": { + "file_id": 5, + "content": "\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(1 * self.n_hid, 2 * self.output_channels, 1)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.vocab_size:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.vocab_size}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)", + "type": "code", + "location": "/dall_e/decoder.py:75-94" + }, + "31": { + "file_id": 5, + "content": "This code defines a class for an encoder-decoder model in PyTorch. The forward function takes in an input tensor and passes it through multiple blocks before returning the output tensor. The model consists of convolutional layers, residual connections, and ReLU activations to process input data.", + "type": "comment" + }, + "32": { + "file_id": 6, + "content": "/dall_e/encoder.py", + "type": "filepath" + }, + "33": { + "file_id": 6, + "content": "The code defines two classes, \"EncoderBlock\" and \"Encoder\", for neural network modules with 4 groups of 2 blocks each, using residual paths. It encodes input, performs max pooling, checks errors, and has various attributes for computation.", + "type": "summary" + }, + "34": { + "file_id": 6, + "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass EncoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 3)),\n\t\t\t\t('relu_2', nn.ReLU()),", + "type": "code", + "location": "/dall_e/encoder.py:1-31" + }, + "35": { + "file_id": 6, + "content": "This code defines a class named \"EncoderBlock\" which is a module for an encoder block in the neural network. It takes input parameters such as the number of input features (n_in), output features (n_out), and layers (n_layers). The module also has properties like device, requires_grad, and initializes instance variables n_hid, post_gain. It uses a partial function to make a convolution layer and creates an identity path and residual path for the encoder block.", + "type": "comment" + }, + "36": { + "file_id": 6, + "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 1)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Encoder(nn.Module):\n\tgroup_count: int = 4\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\tinput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)", + "type": "code", + "location": "/dall_e/encoder.py:32-56" + }, + "37": { + "file_id": 6, + "content": "This code defines a class called \"Encoder\" which is a type of neural network module. It has 4 groups, each group containing 2 blocks of convolutional layers and activation functions. The input is passed through the identity path and the residual path, then their sum is returned as output. The Encoder class also has several attributes such as number of hidden units, number of block per group, input channels, vocabulary size, device to run on, requires gradient computation, and use mixed precision.", + "type": "comment" + }, + "38": { + "file_id": 6, + "content": "\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(EncoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.input_channels, 1 * self.n_hid, 7)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t", + "type": "code", + "location": "/dall_e/encoder.py:57-77" + }, + "39": { + "file_id": 6, + "content": "This code is creating a neural network encoder with multiple blocks. It consists of four groups, each with different number of layers and hidden size. Each group has a series of EncoderBlocks followed by a max pooling operation. The input channel size is defined based on the current block and group configuration.", + "type": "comment" + }, + "40": { + "file_id": 6, + "content": "*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(8 * self.n_hid, self.vocab_size, 1, use_float16=False)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.input_channels:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.input_channels}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)", + "type": "code", + "location": "/dall_e/encoder.py:77-93" + }, + "41": { + "file_id": 6, + "content": "The code defines a neural network module that takes an input tensor and passes it through multiple blocks of convolutional layers. The output is then processed by another set of convolutional layers before returning the final result. The function also includes error checks for the shape, number of channels, and data type of the input tensor to ensure proper functioning.", + "type": "comment" + }, + "42": { + "file_id": 7, + "content": "/dall_e/utils.py", + "type": "filepath" + }, + "43": { + "file_id": 7, + "content": "The code defines a Conv2d class with attributes and initializes weights and biases. It also includes three functions, `map_pixels`, `unmap_pixels`, and `conv2d`, for scaling, convolution operation, and padding based on kernel width.", + "type": "summary" + }, + "44": { + "file_id": 7, + "content": "import attr\nimport math\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nlogit_laplace_eps: float = 0.1\n@attr.s(eq=False)\nclass Conv2d(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tkw: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 2 == 1)\n\tuse_float16: bool = attr.ib(default=True)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tw = torch.empty((self.n_out, self.n_in, self.kw, self.kw), dtype=torch.float32,\n\t\t\tdevice=self.device, requires_grad=self.requires_grad)\n\t\tw.normal_(std=1 / math.sqrt(self.n_in * self.kw ** 2))\n\t\tb = torch.zeros((self.n_out,), dtype=torch.float32, device=self.device,\n\t\t\trequires_grad=self.requires_grad)\n\t\tself.w, self.b = nn.Parameter(w), nn.Parameter(b)\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif self.use_float16 and 'cuda' in self.w.device.type:", + "type": "code", + "location": "/dall_e/utils.py:1-32" + }, + "45": { + "file_id": 7, + "content": "This code defines a Conv2d class that extends torch.nn.Module and implements a 2D convolutional layer using the nn.Conv2d module from PyTorch. The class has several attributes including number of input channels (n_in), number of output channels (n_out), kernel width (kw), use_float16 for whether to use float16 or float32, device for tensor storage location, and requires_grad for whether the parameters should be tracked during backpropagation. The class initializes the weight matrix (self.w) with normal distribution and bias (self.b) as zeros. It also has a forward method that applies the convolution operation on the input tensor (x).", + "type": "comment" + }, + "46": { + "file_id": 7, + "content": "\t\t\tif x.dtype != torch.float16:\n\t\t\t\tx = x.half()\n\t\t\tw, b = self.w.half(), self.b.half()\n\t\telse:\n\t\t\tif x.dtype != torch.float32:\n\t\t\t\tx = x.float()\n\t\t\tw, b = self.w, self.b\n\t\treturn F.conv2d(x, w, b, padding=(self.kw - 1) // 2)\ndef map_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps\ndef unmap_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn torch.clamp((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1)", + "type": "code", + "location": "/dall_e/utils.py:33-59" + }, + "47": { + "file_id": 7, + "content": "The code defines three functions: `map_pixels`, `unmap_pixels`, and `conv2d`. The `map_pixels` function scales the input tensor by a factor and adds a constant to it. It also checks if the input tensor is 4-dimensional and has the correct data type (float). Similarly, the `unmap_pixels` function scales and shifts the input tensor, and ensures the correct dimensions and data type. The `conv2d` function applies a convolution operation on the input tensor with specified weights and biases, and handles the padding based on the kernel width.", + "type": "comment" + }, + "48": { + "file_id": 8, + "content": "/notebooks/usage.py", + "type": "filepath" + }, + "49": { + "file_id": 8, + "content": "The code imports libraries, defines functions for image downloading and preprocessing, sets the target size, loads DALL-E models, processes an image, reconstructs it using the models, and displays both images.", + "type": "summary" + }, + "50": { + "file_id": 8, + "content": "#!/usr/bin/env python\n# coding: utf-8\n# In[ ]:\nimport io\nimport os, sys\nimport requests\nimport PIL\nimport torch\nimport torchvision.transforms as T\nimport torchvision.transforms.functional as TF\nfrom dall_e import map_pixels, unmap_pixels, load_model\nfrom IPython.display import display, display_markdown\ntarget_image_size = 256\ndef download_image(url):\n resp = requests.get(url)\n resp.raise_for_status()\n return PIL.Image.open(io.BytesIO(resp.content))\ndef preprocess(img):\n s = min(img.size)\n if s < target_image_size:\n raise ValueError(f'min dim for image {s} < {target_image_size}')\n r = target_image_size / s\n s = (round(r * img.size[1]), round(r * img.size[0]))\n img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)\n img = TF.center_crop(img, output_size=2 * [target_image_size])\n img = torch.unsqueeze(T.ToTensor()(img), 0)\n return map_pixels(img)\n# In[ ]:\n# This can be changed to a GPU, e.g. 'cuda:0'.\ndev = torch.device('cpu')\n# For faster load times, download these files locally and use the local paths instead.", + "type": "code", + "location": "/notebooks/usage.py:1-46" + }, + "51": { + "file_id": 8, + "content": "This code imports necessary libraries, defines functions for downloading and preprocessing images, sets the target image size, and specifies the device (CPU) to be used.", + "type": "comment" + }, + "52": { + "file_id": 8, + "content": "enc = load_model(\"https://cdn.openai.com/dall-e/encoder.pkl\", dev)\ndec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", dev)\n# In[ ]:\nx = preprocess(download_image('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg'))\ndisplay_markdown('Original image:')\ndisplay(T.ToPILImage(mode='RGB')(x[0]))\n# In[ ]:\nimport torch.nn.functional as F\nz_logits = enc(x)\nz = torch.argmax(z_logits, axis=1)\nz = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float()\nx_stats = dec(z).float()\nx_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3]))\nx_rec = T.ToPILImage(mode='RGB')(x_rec[0])\ndisplay_markdown('Reconstructed image:')\ndisplay(x_rec)\n# In[ ]:", + "type": "code", + "location": "/notebooks/usage.py:47-76" + }, + "53": { + "file_id": 8, + "content": "This code loads the DALL-E encoder and decoder models, preprocesses an image, reconstructs it using the models, and displays both the original and reconstructed images.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/doc/0c3fc06c-6184-4357-85d6-348275dd29c6.json b/docs/doc/0c3fc06c-6184-4357-85d6-348275dd29c6.json new file mode 100644 index 0000000..52f81fb --- /dev/null +++ b/docs/doc/0c3fc06c-6184-4357-85d6-348275dd29c6.json @@ -0,0 +1,25 @@ +{ + "summary": "The code defines two classes, \"EncoderBlock\" and \"Encoder\", for neural network modules with 4 groups of 2 blocks each, using residual paths. It encodes input, performs max pooling, checks errors, and has various attributes for computation.", + "details": [ + { + "comment": "This code defines a class named \"EncoderBlock\" which is a module for an encoder block in the neural network. It takes input parameters such as the number of input features (n_in), output features (n_out), and layers (n_layers). The module also has properties like device, requires_grad, and initializes instance variables n_hid, post_gain. It uses a partial function to make a convolution layer and creates an identity path and residual path for the encoder block.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/encoder.py\":0-30", + "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass EncoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 3)),\n\t\t\t\t('relu_2', nn.ReLU())," + }, + { + "comment": "This code defines a class called \"Encoder\" which is a type of neural network module. It has 4 groups, each group containing 2 blocks of convolutional layers and activation functions. The input is passed through the identity path and the residual path, then their sum is returned as output. The Encoder class also has several attributes such as number of hidden units, number of block per group, input channels, vocabulary size, device to run on, requires gradient computation, and use mixed precision.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/encoder.py\":31-55", + "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 1)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Encoder(nn.Module):\n\tgroup_count: int = 4\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\tinput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)" + }, + { + "comment": "This code is creating a neural network encoder with multiple blocks. It consists of four groups, each with different number of layers and hidden size. Each group has a series of EncoderBlocks followed by a max pooling operation. The input channel size is defined based on the current block and group configuration.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/encoder.py\":56-76", + "content": "\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(EncoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.input_channels, 1 * self.n_hid, 7)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t" + }, + { + "comment": "The code defines a neural network module that takes an input tensor and passes it through multiple blocks of convolutional layers. The output is then processed by another set of convolutional layers before returning the final result. The function also includes error checks for the shape, number of channels, and data type of the input tensor to ensure proper functioning.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/encoder.py\":76-92", + "content": "*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(8 * self.n_hid, self.vocab_size, 1, use_float16=False)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.input_channels:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.input_channels}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/2702f9ef-853c-453e-a9d3-52a7f948e47d.json b/docs/doc/2702f9ef-853c-453e-a9d3-52a7f948e47d.json new file mode 100644 index 0000000..de92065 --- /dev/null +++ b/docs/doc/2702f9ef-853c-453e-a9d3-52a7f948e47d.json @@ -0,0 +1,10 @@ +{ + "summary": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "details": [ + { + "comment": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/requirements.txt\":0-7", + "content": "Pillow\nblobfile\nmypy\nnumpy\npytest\nrequests\ntorch\ntorchvision" + } + ] +} \ No newline at end of file diff --git a/docs/doc/6a64bc63-97f1-469f-8134-72ca11c14704.json b/docs/doc/6a64bc63-97f1-469f-8134-72ca11c14704.json new file mode 100644 index 0000000..ebd149e --- /dev/null +++ b/docs/doc/6a64bc63-97f1-469f-8134-72ca11c14704.json @@ -0,0 +1,10 @@ +{ + "summary": "This is the official PyTorch package for DALL-E's discrete VAE. Transformer for image generation from text not included. Install using pip install DALL-E.", + "details": [ + { + "comment": "This is the official PyTorch package for DALL-E's discrete VAE. Transformer for image generation from text not included. Install using pip install DALL-E.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/README.md\":0-10", + "content": "# Overview\n[[Blog]](https://openai.com/blog/dall-e/) [[Paper]](https://arxiv.org/abs/2102.12092) [[Model Card]](model_card.md) [[Usage]](notebooks/usage.ipynb)\nThis is the official PyTorch package for the discrete VAE used for DALL\u00b7E. The transformer used to generate the images from the text is not part of this code release.\n# Installation\nBefore running [the example notebook](notebooks/usage.ipynb), you will need to install the package using\n\tpip install DALL-E" + } + ] +} \ No newline at end of file diff --git a/docs/doc/6e2219bf-c085-433e-9e45-71ef30ea0a5a.json b/docs/doc/6e2219bf-c085-433e-9e45-71ef30ea0a5a.json new file mode 100644 index 0000000..e8b111d --- /dev/null +++ b/docs/doc/6e2219bf-c085-433e-9e45-71ef30ea0a5a.json @@ -0,0 +1,10 @@ +{ + "summary": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", + "details": [ + { + "comment": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/__init__.py\":0-17", + "content": "import io, requests\nimport torch\nimport torch.nn as nn\nfrom dall_e.encoder import Encoder\nfrom dall_e.decoder import Decoder\nfrom dall_e.utils import map_pixels, unmap_pixels\ndef load_model(path: str, device: torch.device = None) -> nn.Module:\n if path.startswith('http://') or path.startswith('https://'):\n resp = requests.get(path)\n resp.raise_for_status()\n with io.BytesIO(resp.content) as buf:\n return torch.load(buf, map_location=device)\n else:\n with open(path, 'rb') as f:\n return torch.load(f, map_location=device)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/ba82746b-979a-400b-9ee9-a0c71b845a6b.json b/docs/doc/ba82746b-979a-400b-9ee9-a0c71b845a6b.json new file mode 100644 index 0000000..924b92b --- /dev/null +++ b/docs/doc/ba82746b-979a-400b-9ee9-a0c71b845a6b.json @@ -0,0 +1,25 @@ +{ + "summary": "PyTorch models are described in both comments, with Comment A focusing on DALL-E's architecture involving multiple groups and partial functions for convolutional layers. In contrast, Comment B presents an encoder-decoder model using convolutional layers, residual connections, and ReLU activations.", + "details": [ + { + "comment": "This code defines a DecoderBlock class which is a neural network module. It takes input size (n_in), output size (n_out), number of layers (n_layers), device, and requires_grad as attributes. It initializes the hidden layer size (n_hid), post gain value, and makes convolution layers using partial function. The id_path is an identity path if n_in == n_out, otherwise it's a convolution layer. The res_path is a sequence of ReLU activations and convolution layers.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/decoder.py\":0-30", + "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass DecoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 1)),\n\t\t\t\t('relu_2', nn.ReLU())," + }, + { + "comment": "This code defines a class called Decoder which is an instance of the nn.Module class in PyTorch. It has several attributes such as group_count, n_init, n_hid, n_blk_per_group, output_channels, vocab_size, device, requires_grad, and use_mixed_precision. The forward method is defined to compute the forward pass of the decoder network. It uses a combination of the id_path and res_path outputs, which are likely residual paths in the network. The make_conv function seems to be used to create convolutional layers with specified parameters.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/decoder.py\":31-53", + "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 3)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Decoder(nn.Module):\n\tgroup_count: int = 4\n\tn_init: int = attr.ib(default=128, validator=lambda i, a, x: x >= 8)\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\toutput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:" + }, + { + "comment": "This code initializes a neural network for the DALL-E model, consisting of multiple groups with progressively smaller block sizes. It uses partial functions to create convolutional layers and blocks. The input is fed through a series of upsampling and convolution operations in each group before being processed by the final output layer.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/decoder.py\":54-73", + "content": "\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)\n\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(DecoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.vocab_size, self.n_init, 1, use_float16=False)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(self.n_init if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(8 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range]," + }, + { + "comment": "This code defines a class for an encoder-decoder model in PyTorch. The forward function takes in an input tensor and passes it through multiple blocks before returning the output tensor. The model consists of convolutional layers, residual connections, and ReLU activations to process input data.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/decoder.py\":74-93", + "content": "\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(1 * self.n_hid, 2 * self.output_channels, 1)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.vocab_size:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.vocab_size}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/bed0c727-b3e6-471e-9e70-c52fd603fa0d.json b/docs/doc/bed0c727-b3e6-471e-9e70-c52fd603fa0d.json new file mode 100644 index 0000000..777da80 --- /dev/null +++ b/docs/doc/bed0c727-b3e6-471e-9e70-c52fd603fa0d.json @@ -0,0 +1,15 @@ +{ + "summary": "DALL\u00b7E's dVAE model by OpenAI reduces memory footprint but is unsuitable for high-fidelity image processing and general-purpose image compression due to loss of fine details.", + "details": [ + { + "comment": "This is the model card for DALL\u00b7E's discrete VAE (dVAE), which was developed by OpenAI to reduce transformer memory footprint.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/model_card.md\":0-20", + "content": "# Model Card: DALL\u00b7E dVAE\nFollowing [Model Cards for Model Reporting (Mitchell et al.)](https://arxiv.org/abs/1810.03993) and [Lessons from\nArchives (Jo & Gebru)](https://arxiv.org/pdf/1912.10389.pdf), we're providing some information about about the discrete\nVAE (dVAE) that was used to train DALL\u00b7E.\n## Model Details\nThe dVAE was developed by researchers at OpenAI to reduce the memory footprint of the transformer trained on the\ntext-to-image generation task. The details involved in training the dVAE are described in [the paper][dalle_paper]. This\nmodel card describes the first version of the model, released in February 2021. The model consists of a convolutional\nencoder and decoder whose architectures are described [here](dall_e/encoder.py) and [here](dall_e/decoder.py), respectively.\nFor questions or comments about the models or the code release, please file a Github issue.\n## Model Use\n### Intended Use\nThe model is intended for others to use for training their own generative models.\n### Out-of-Scope Use Cases" + }, + { + "comment": "The model is not suitable for high-fidelity image processing or general-purpose image compression. It was trained on a mix of Conceptual Captions and filtered YFCC100M datasets using specific filters, details in the paper. The dataset will not be released. Compression leads to loss of fine image details, making it unsuitable for applications requiring preserved details.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/model_card.md\":22-40", + "content": "This model is inappropriate for high-fidelity image processing applications. We also do not recommend its use as a\ngeneral-purpose image compressor.\n## Training Data\nThe model was trained on publicly available text-image pairs collected from the internet. This data consists partly of\n[Conceptual Captions][cc] and a filtered subset of [YFCC100M][yfcc100m]. We used a subset of the filters described in\n[Sharma et al.][cc_paper] to construct this dataset; further details are described in [our paper][dalle_paper]. We will\nnot be releasing the dataset.\n## Performance and Limitations\nThe heavy compression from the encoding process results in a noticeable loss of detail in the reconstructed images. This\nrenders it inappropriate for applications that require fine-grained details of the image to be preserved.\n[dalle_paper]: https://arxiv.org/abs/2102.12092\n[cc]: https://ai.google.com/research/ConceptualCaptions\n[cc_paper]: https://www.aclweb.org/anthology/P18-1238/\n[yfcc100m]: http://projects.dfki.uni-kl.de/yfcc100m/" + } + ] +} \ No newline at end of file diff --git a/docs/doc/c73a6545-b180-4570-aada-92761b8c15d5.json b/docs/doc/c73a6545-b180-4570-aada-92761b8c15d5.json new file mode 100644 index 0000000..6a3d8c9 --- /dev/null +++ b/docs/doc/c73a6545-b180-4570-aada-92761b8c15d5.json @@ -0,0 +1,15 @@ +{ + "summary": "The code defines a Conv2d class with attributes and initializes weights and biases. It also includes three functions, `map_pixels`, `unmap_pixels`, and `conv2d`, for scaling, convolution operation, and padding based on kernel width.", + "details": [ + { + "comment": "This code defines a Conv2d class that extends torch.nn.Module and implements a 2D convolutional layer using the nn.Conv2d module from PyTorch. The class has several attributes including number of input channels (n_in), number of output channels (n_out), kernel width (kw), use_float16 for whether to use float16 or float32, device for tensor storage location, and requires_grad for whether the parameters should be tracked during backpropagation. The class initializes the weight matrix (self.w) with normal distribution and bias (self.b) as zeros. It also has a forward method that applies the convolution operation on the input tensor (x).", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/utils.py\":0-31", + "content": "import attr\nimport math\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nlogit_laplace_eps: float = 0.1\n@attr.s(eq=False)\nclass Conv2d(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tkw: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 2 == 1)\n\tuse_float16: bool = attr.ib(default=True)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tw = torch.empty((self.n_out, self.n_in, self.kw, self.kw), dtype=torch.float32,\n\t\t\tdevice=self.device, requires_grad=self.requires_grad)\n\t\tw.normal_(std=1 / math.sqrt(self.n_in * self.kw ** 2))\n\t\tb = torch.zeros((self.n_out,), dtype=torch.float32, device=self.device,\n\t\t\trequires_grad=self.requires_grad)\n\t\tself.w, self.b = nn.Parameter(w), nn.Parameter(b)\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif self.use_float16 and 'cuda' in self.w.device.type:" + }, + { + "comment": "The code defines three functions: `map_pixels`, `unmap_pixels`, and `conv2d`. The `map_pixels` function scales the input tensor by a factor and adds a constant to it. It also checks if the input tensor is 4-dimensional and has the correct data type (float). Similarly, the `unmap_pixels` function scales and shifts the input tensor, and ensures the correct dimensions and data type. The `conv2d` function applies a convolution operation on the input tensor with specified weights and biases, and handles the padding based on the kernel width.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/dall_e/utils.py\":32-58", + "content": "\t\t\tif x.dtype != torch.float16:\n\t\t\t\tx = x.half()\n\t\t\tw, b = self.w.half(), self.b.half()\n\t\telse:\n\t\t\tif x.dtype != torch.float32:\n\t\t\t\tx = x.float()\n\t\t\tw, b = self.w, self.b\n\t\treturn F.conv2d(x, w, b, padding=(self.kw - 1) // 2)\ndef map_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps\ndef unmap_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn torch.clamp((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/d4fbdf63-7c60-4310-adf4-002231b4814d.json b/docs/doc/d4fbdf63-7c60-4310-adf4-002231b4814d.json new file mode 100644 index 0000000..2182275 --- /dev/null +++ b/docs/doc/d4fbdf63-7c60-4310-adf4-002231b4814d.json @@ -0,0 +1,10 @@ +{ + "summary": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", + "details": [ + { + "comment": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/setup.py\":0-15", + "content": "from setuptools import setup\ndef parse_requirements(filename):\n\tlines = (line.strip() for line in open(filename))\n\treturn [line for line in lines if line and not line.startswith(\"#\")]\nsetup(name='DALL-E',\n version='0.1',\n description='PyTorch package for the discrete VAE used for DALL\u00b7E.',\n url='http://github.com/openai/DALL-E',\n author='Aditya Ramesh',\n author_email='aramesh@openai.com',\n license='BSD',\n packages=['dall_e'],\n install_requires=parse_requirements('requirements.txt'),\n zip_safe=True)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/ee2a8a01-bbe2-4f6a-959c-188fbbe67bbf.json b/docs/doc/ee2a8a01-bbe2-4f6a-959c-188fbbe67bbf.json new file mode 100644 index 0000000..209dc35 --- /dev/null +++ b/docs/doc/ee2a8a01-bbe2-4f6a-959c-188fbbe67bbf.json @@ -0,0 +1,15 @@ +{ + "summary": "The code imports libraries, defines functions for image downloading and preprocessing, sets the target size, loads DALL-E models, processes an image, reconstructs it using the models, and displays both images.", + "details": [ + { + "comment": "This code imports necessary libraries, defines functions for downloading and preprocessing images, sets the target image size, and specifies the device (CPU) to be used.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/notebooks/usage.py\":0-45", + "content": "#!/usr/bin/env python\n# coding: utf-8\n# In[ ]:\nimport io\nimport os, sys\nimport requests\nimport PIL\nimport torch\nimport torchvision.transforms as T\nimport torchvision.transforms.functional as TF\nfrom dall_e import map_pixels, unmap_pixels, load_model\nfrom IPython.display import display, display_markdown\ntarget_image_size = 256\ndef download_image(url):\n resp = requests.get(url)\n resp.raise_for_status()\n return PIL.Image.open(io.BytesIO(resp.content))\ndef preprocess(img):\n s = min(img.size)\n if s < target_image_size:\n raise ValueError(f'min dim for image {s} < {target_image_size}')\n r = target_image_size / s\n s = (round(r * img.size[1]), round(r * img.size[0]))\n img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)\n img = TF.center_crop(img, output_size=2 * [target_image_size])\n img = torch.unsqueeze(T.ToTensor()(img), 0)\n return map_pixels(img)\n# In[ ]:\n# This can be changed to a GPU, e.g. 'cuda:0'.\ndev = torch.device('cpu')\n# For faster load times, download these files locally and use the local paths instead." + }, + { + "comment": "This code loads the DALL-E encoder and decoder models, preprocesses an image, reconstructs it using the models, and displays both the original and reconstructed images.", + "location": "\"/media/root/Toshiba XG3/works/DALL-E/docs/src/notebooks/usage.py\":46-75", + "content": "enc = load_model(\"https://cdn.openai.com/dall-e/encoder.pkl\", dev)\ndec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", dev)\n# In[ ]:\nx = preprocess(download_image('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg'))\ndisplay_markdown('Original image:')\ndisplay(T.ToPILImage(mode='RGB')(x[0]))\n# In[ ]:\nimport torch.nn.functional as F\nz_logits = enc(x)\nz = torch.argmax(z_logits, axis=1)\nz = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float()\nx_stats = dec(z).float()\nx_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3]))\nx_rec = T.ToPILImage(mode='RGB')(x_rec[0])\ndisplay_markdown('Reconstructed image:')\ndisplay(x_rec)\n# In[ ]:" + } + ] +} \ No newline at end of file diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..3b050e0 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,724 @@ + + + + + + + + + Search Code By Comment + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + +
+

Document Index of: +

+ + + + + +
+ + + + + + \ No newline at end of file diff --git a/docs/metadata.json b/docs/metadata.json new file mode 100644 index 0000000..a1d0768 --- /dev/null +++ b/docs/metadata.json @@ -0,0 +1,55 @@ +{ + "url": { + "full": "https://github.com/james4ever0/DALL-E", + "partial": "james4ever0/DALL-E" + }, + "file_mapping": { + "0": { + "filepath": "/README.md", + "entry_id": 0, + "language_id": "plain-text" + }, + "1": { + "filepath": "/model_card.md", + "entry_id": 4, + "language_id": "markdown" + }, + "2": { + "filepath": "/requirements.txt", + "entry_id": 10, + "language_id": "plain-text" + }, + "3": { + "filepath": "/setup.py", + "entry_id": 14, + "language_id": "python" + }, + "4": { + "filepath": "/dall_e/__init__.py", + "entry_id": 18, + "language_id": "python" + }, + "5": { + "filepath": "/dall_e/decoder.py", + "entry_id": 22, + "language_id": "python" + }, + "6": { + "filepath": "/dall_e/encoder.py", + "entry_id": 32, + "language_id": "python" + }, + "7": { + "filepath": "/dall_e/utils.py", + "entry_id": 42, + "language_id": "python" + }, + "8": { + "filepath": "/notebooks/usage.py", + "entry_id": 48, + "language_id": "python" + } + }, + "project_name": "DALL-E", + "split_count": 1 +} \ No newline at end of file diff --git a/docs/src/README.md b/docs/src/README.md new file mode 100644 index 0000000..006a7ed --- /dev/null +++ b/docs/src/README.md @@ -0,0 +1,11 @@ +# Overview + +[[Blog]](https://openai.com/blog/dall-e/) [[Paper]](https://arxiv.org/abs/2102.12092) [[Model Card]](model_card.md) [[Usage]](notebooks/usage.ipynb) + +This is the official PyTorch package for the discrete VAE used for DALL·E. The transformer used to generate the images from the text is not part of this code release. + +# Installation + +Before running [the example notebook](notebooks/usage.ipynb), you will need to install the package using + + pip install DALL-E diff --git a/docs/src/dall_e/__init__.py b/docs/src/dall_e/__init__.py new file mode 100644 index 0000000..cd982fa --- /dev/null +++ b/docs/src/dall_e/__init__.py @@ -0,0 +1,18 @@ +import io, requests +import torch +import torch.nn as nn + +from dall_e.encoder import Encoder +from dall_e.decoder import Decoder +from dall_e.utils import map_pixels, unmap_pixels + +def load_model(path: str, device: torch.device = None) -> nn.Module: + if path.startswith('http://') or path.startswith('https://'): + resp = requests.get(path) + resp.raise_for_status() + + with io.BytesIO(resp.content) as buf: + return torch.load(buf, map_location=device) + else: + with open(path, 'rb') as f: + return torch.load(f, map_location=device) diff --git a/docs/src/dall_e/decoder.py b/docs/src/dall_e/decoder.py new file mode 100644 index 0000000..640625e --- /dev/null +++ b/docs/src/dall_e/decoder.py @@ -0,0 +1,94 @@ +import attr +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from collections import OrderedDict +from functools import partial +from dall_e.utils import Conv2d + +@attr.s(eq=False, repr=False) +class DecoderBlock(nn.Module): + n_in: int = attr.ib(validator=lambda i, a, x: x >= 1) + n_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0) + n_layers: int = attr.ib(validator=lambda i, a, x: x >= 1) + + device: torch.device = attr.ib(default=None) + requires_grad: bool = attr.ib(default=False) + + def __attrs_post_init__(self) -> None: + super().__init__() + self.n_hid = self.n_out // 4 + self.post_gain = 1 / (self.n_layers ** 2) + + make_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad) + self.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity() + self.res_path = nn.Sequential(OrderedDict([ + ('relu_1', nn.ReLU()), + ('conv_1', make_conv(self.n_in, self.n_hid, 1)), + ('relu_2', nn.ReLU()), + ('conv_2', make_conv(self.n_hid, self.n_hid, 3)), + ('relu_3', nn.ReLU()), + ('conv_3', make_conv(self.n_hid, self.n_hid, 3)), + ('relu_4', nn.ReLU()), + ('conv_4', make_conv(self.n_hid, self.n_out, 3)),])) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.id_path(x) + self.post_gain * self.res_path(x) + +@attr.s(eq=False, repr=False) +class Decoder(nn.Module): + group_count: int = 4 + n_init: int = attr.ib(default=128, validator=lambda i, a, x: x >= 8) + n_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64) + n_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1) + output_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1) + vocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512) + + device: torch.device = attr.ib(default=torch.device('cpu')) + requires_grad: bool = attr.ib(default=False) + use_mixed_precision: bool = attr.ib(default=True) + + def __attrs_post_init__(self) -> None: + super().__init__() + + blk_range = range(self.n_blk_per_group) + n_layers = self.group_count * self.n_blk_per_group + make_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad) + make_blk = partial(DecoderBlock, n_layers=n_layers, device=self.device, + requires_grad=self.requires_grad) + + self.blocks = nn.Sequential(OrderedDict([ + ('input', make_conv(self.vocab_size, self.n_init, 1, use_float16=False)), + ('group_1', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(self.n_init if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range], + ('upsample', nn.Upsample(scale_factor=2, mode='nearest')), + ]))), + ('group_2', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(8 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range], + ('upsample', nn.Upsample(scale_factor=2, mode='nearest')), + ]))), + ('group_3', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range], + ('upsample', nn.Upsample(scale_factor=2, mode='nearest')), + ]))), + ('group_4', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 1 * self.n_hid, 1 * self.n_hid)) for i in blk_range], + ]))), + ('output', nn.Sequential(OrderedDict([ + ('relu', nn.ReLU()), + ('conv', make_conv(1 * self.n_hid, 2 * self.output_channels, 1)), + ]))), + ])) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if len(x.shape) != 4: + raise ValueError(f'input shape {x.shape} is not 4d') + if x.shape[1] != self.vocab_size: + raise ValueError(f'input has {x.shape[1]} channels but model built for {self.vocab_size}') + if x.dtype != torch.float32: + raise ValueError('input must have dtype torch.float32') + + return self.blocks(x) diff --git a/docs/src/dall_e/encoder.py b/docs/src/dall_e/encoder.py new file mode 100644 index 0000000..712f2f5 --- /dev/null +++ b/docs/src/dall_e/encoder.py @@ -0,0 +1,93 @@ +import attr +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from collections import OrderedDict +from functools import partial +from dall_e.utils import Conv2d + +@attr.s(eq=False, repr=False) +class EncoderBlock(nn.Module): + n_in: int = attr.ib(validator=lambda i, a, x: x >= 1) + n_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0) + n_layers: int = attr.ib(validator=lambda i, a, x: x >= 1) + + device: torch.device = attr.ib(default=None) + requires_grad: bool = attr.ib(default=False) + + def __attrs_post_init__(self) -> None: + super().__init__() + self.n_hid = self.n_out // 4 + self.post_gain = 1 / (self.n_layers ** 2) + + make_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad) + self.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity() + self.res_path = nn.Sequential(OrderedDict([ + ('relu_1', nn.ReLU()), + ('conv_1', make_conv(self.n_in, self.n_hid, 3)), + ('relu_2', nn.ReLU()), + ('conv_2', make_conv(self.n_hid, self.n_hid, 3)), + ('relu_3', nn.ReLU()), + ('conv_3', make_conv(self.n_hid, self.n_hid, 3)), + ('relu_4', nn.ReLU()), + ('conv_4', make_conv(self.n_hid, self.n_out, 1)),])) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.id_path(x) + self.post_gain * self.res_path(x) + +@attr.s(eq=False, repr=False) +class Encoder(nn.Module): + group_count: int = 4 + n_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64) + n_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1) + input_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1) + vocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512) + + device: torch.device = attr.ib(default=torch.device('cpu')) + requires_grad: bool = attr.ib(default=False) + use_mixed_precision: bool = attr.ib(default=True) + + def __attrs_post_init__(self) -> None: + super().__init__() + + blk_range = range(self.n_blk_per_group) + n_layers = self.group_count * self.n_blk_per_group + make_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad) + make_blk = partial(EncoderBlock, n_layers=n_layers, device=self.device, + requires_grad=self.requires_grad) + + self.blocks = nn.Sequential(OrderedDict([ + ('input', make_conv(self.input_channels, 1 * self.n_hid, 7)), + ('group_1', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(1 * self.n_hid, 1 * self.n_hid)) for i in blk_range], + ('pool', nn.MaxPool2d(kernel_size=2)), + ]))), + ('group_2', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(1 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range], + ('pool', nn.MaxPool2d(kernel_size=2)), + ]))), + ('group_3', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range], + ('pool', nn.MaxPool2d(kernel_size=2)), + ]))), + ('group_4', nn.Sequential(OrderedDict([ + *[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range], + ]))), + ('output', nn.Sequential(OrderedDict([ + ('relu', nn.ReLU()), + ('conv', make_conv(8 * self.n_hid, self.vocab_size, 1, use_float16=False)), + ]))), + ])) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if len(x.shape) != 4: + raise ValueError(f'input shape {x.shape} is not 4d') + if x.shape[1] != self.input_channels: + raise ValueError(f'input has {x.shape[1]} channels but model built for {self.input_channels}') + if x.dtype != torch.float32: + raise ValueError('input must have dtype torch.float32') + + return self.blocks(x) diff --git a/docs/src/dall_e/utils.py b/docs/src/dall_e/utils.py new file mode 100644 index 0000000..cdb1cad --- /dev/null +++ b/docs/src/dall_e/utils.py @@ -0,0 +1,59 @@ +import attr +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +logit_laplace_eps: float = 0.1 + +@attr.s(eq=False) +class Conv2d(nn.Module): + n_in: int = attr.ib(validator=lambda i, a, x: x >= 1) + n_out: int = attr.ib(validator=lambda i, a, x: x >= 1) + kw: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 2 == 1) + + use_float16: bool = attr.ib(default=True) + device: torch.device = attr.ib(default=torch.device('cpu')) + requires_grad: bool = attr.ib(default=False) + + def __attrs_post_init__(self) -> None: + super().__init__() + + w = torch.empty((self.n_out, self.n_in, self.kw, self.kw), dtype=torch.float32, + device=self.device, requires_grad=self.requires_grad) + w.normal_(std=1 / math.sqrt(self.n_in * self.kw ** 2)) + + b = torch.zeros((self.n_out,), dtype=torch.float32, device=self.device, + requires_grad=self.requires_grad) + self.w, self.b = nn.Parameter(w), nn.Parameter(b) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.use_float16 and 'cuda' in self.w.device.type: + if x.dtype != torch.float16: + x = x.half() + + w, b = self.w.half(), self.b.half() + else: + if x.dtype != torch.float32: + x = x.float() + + w, b = self.w, self.b + + return F.conv2d(x, w, b, padding=(self.kw - 1) // 2) + +def map_pixels(x: torch.Tensor) -> torch.Tensor: + if len(x.shape) != 4: + raise ValueError('expected input to be 4d') + if x.dtype != torch.float: + raise ValueError('expected input to have type float') + + return (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps + +def unmap_pixels(x: torch.Tensor) -> torch.Tensor: + if len(x.shape) != 4: + raise ValueError('expected input to be 4d') + if x.dtype != torch.float: + raise ValueError('expected input to have type float') + + return torch.clamp((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1) diff --git a/docs/src/model_card.md b/docs/src/model_card.md new file mode 100644 index 0000000..89ce211 --- /dev/null +++ b/docs/src/model_card.md @@ -0,0 +1,41 @@ +# Model Card: DALL·E dVAE + +Following [Model Cards for Model Reporting (Mitchell et al.)](https://arxiv.org/abs/1810.03993) and [Lessons from +Archives (Jo & Gebru)](https://arxiv.org/pdf/1912.10389.pdf), we're providing some information about about the discrete +VAE (dVAE) that was used to train DALL·E. + +## Model Details + +The dVAE was developed by researchers at OpenAI to reduce the memory footprint of the transformer trained on the +text-to-image generation task. The details involved in training the dVAE are described in [the paper][dalle_paper]. This +model card describes the first version of the model, released in February 2021. The model consists of a convolutional +encoder and decoder whose architectures are described [here](dall_e/encoder.py) and [here](dall_e/decoder.py), respectively. +For questions or comments about the models or the code release, please file a Github issue. + +## Model Use + +### Intended Use + +The model is intended for others to use for training their own generative models. + +### Out-of-Scope Use Cases + +This model is inappropriate for high-fidelity image processing applications. We also do not recommend its use as a +general-purpose image compressor. + +## Training Data + +The model was trained on publicly available text-image pairs collected from the internet. This data consists partly of +[Conceptual Captions][cc] and a filtered subset of [YFCC100M][yfcc100m]. We used a subset of the filters described in +[Sharma et al.][cc_paper] to construct this dataset; further details are described in [our paper][dalle_paper]. We will +not be releasing the dataset. + +## Performance and Limitations + +The heavy compression from the encoding process results in a noticeable loss of detail in the reconstructed images. This +renders it inappropriate for applications that require fine-grained details of the image to be preserved. + +[dalle_paper]: https://arxiv.org/abs/2102.12092 +[cc]: https://ai.google.com/research/ConceptualCaptions +[cc_paper]: https://www.aclweb.org/anthology/P18-1238/ +[yfcc100m]: http://projects.dfki.uni-kl.de/yfcc100m/ diff --git a/docs/src/notebooks/usage.py b/docs/src/notebooks/usage.py new file mode 100644 index 0000000..4a53248 --- /dev/null +++ b/docs/src/notebooks/usage.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import io +import os, sys +import requests +import PIL + +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as TF + +from dall_e import map_pixels, unmap_pixels, load_model +from IPython.display import display, display_markdown + +target_image_size = 256 + +def download_image(url): + resp = requests.get(url) + resp.raise_for_status() + return PIL.Image.open(io.BytesIO(resp.content)) + +def preprocess(img): + s = min(img.size) + + if s < target_image_size: + raise ValueError(f'min dim for image {s} < {target_image_size}') + + r = target_image_size / s + s = (round(r * img.size[1]), round(r * img.size[0])) + img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS) + img = TF.center_crop(img, output_size=2 * [target_image_size]) + img = torch.unsqueeze(T.ToTensor()(img), 0) + return map_pixels(img) + + +# In[ ]: + + +# This can be changed to a GPU, e.g. 'cuda:0'. +dev = torch.device('cpu') + +# For faster load times, download these files locally and use the local paths instead. +enc = load_model("https://cdn.openai.com/dall-e/encoder.pkl", dev) +dec = load_model("https://cdn.openai.com/dall-e/decoder.pkl", dev) + + +# In[ ]: + + +x = preprocess(download_image('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg')) +display_markdown('Original image:') +display(T.ToPILImage(mode='RGB')(x[0])) + + +# In[ ]: + + +import torch.nn.functional as F + +z_logits = enc(x) +z = torch.argmax(z_logits, axis=1) +z = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float() + +x_stats = dec(z).float() +x_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3])) +x_rec = T.ToPILImage(mode='RGB')(x_rec[0]) + +display_markdown('Reconstructed image:') +display(x_rec) + + +# In[ ]: + + + + diff --git a/docs/src/requirements.txt b/docs/src/requirements.txt new file mode 100644 index 0000000..bc5845f --- /dev/null +++ b/docs/src/requirements.txt @@ -0,0 +1,8 @@ +Pillow +blobfile +mypy +numpy +pytest +requests +torch +torchvision diff --git a/docs/src/setup.py b/docs/src/setup.py new file mode 100644 index 0000000..ce79bc9 --- /dev/null +++ b/docs/src/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup + +def parse_requirements(filename): + lines = (line.strip() for line in open(filename)) + return [line for line in lines if line and not line.startswith("#")] + +setup(name='DALL-E', + version='0.1', + description='PyTorch package for the discrete VAE used for DALL·E.', + url='http://github.com/openai/DALL-E', + author='Aditya Ramesh', + author_email='aramesh@openai.com', + license='BSD', + packages=['dall_e'], + install_requires=parse_requirements('requirements.txt'), + zip_safe=True) diff --git a/notebooks/usage.py b/notebooks/usage.py new file mode 100644 index 0000000..4a53248 --- /dev/null +++ b/notebooks/usage.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[ ]: + + +import io +import os, sys +import requests +import PIL + +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as TF + +from dall_e import map_pixels, unmap_pixels, load_model +from IPython.display import display, display_markdown + +target_image_size = 256 + +def download_image(url): + resp = requests.get(url) + resp.raise_for_status() + return PIL.Image.open(io.BytesIO(resp.content)) + +def preprocess(img): + s = min(img.size) + + if s < target_image_size: + raise ValueError(f'min dim for image {s} < {target_image_size}') + + r = target_image_size / s + s = (round(r * img.size[1]), round(r * img.size[0])) + img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS) + img = TF.center_crop(img, output_size=2 * [target_image_size]) + img = torch.unsqueeze(T.ToTensor()(img), 0) + return map_pixels(img) + + +# In[ ]: + + +# This can be changed to a GPU, e.g. 'cuda:0'. +dev = torch.device('cpu') + +# For faster load times, download these files locally and use the local paths instead. +enc = load_model("https://cdn.openai.com/dall-e/encoder.pkl", dev) +dec = load_model("https://cdn.openai.com/dall-e/decoder.pkl", dev) + + +# In[ ]: + + +x = preprocess(download_image('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg')) +display_markdown('Original image:') +display(T.ToPILImage(mode='RGB')(x[0])) + + +# In[ ]: + + +import torch.nn.functional as F + +z_logits = enc(x) +z = torch.argmax(z_logits, axis=1) +z = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float() + +x_stats = dec(z).float() +x_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3])) +x_rec = T.ToPILImage(mode='RGB')(x_rec[0]) + +display_markdown('Reconstructed image:') +display(x_rec) + + +# In[ ]: + + + + From 1c4a7361561e0e47c409ac1f5a351aecfb442b8f Mon Sep 17 00:00:00 2001 From: GitJournal Date: Wed, 10 Jan 2024 04:04:38 +0800 Subject: [PATCH 02/28] update1 --- docs/.gitignore | 1 + docs/codeview.html | 20 +- docs/github-markdown.css | 1197 ++++++++++++++++++++++++++++++++++++++ docs/index.html | 91 ++- docs/metadata.json | 4 +- docs/tree.html | 122 ++++ 6 files changed, 1407 insertions(+), 28 deletions(-) create mode 100644 docs/github-markdown.css create mode 100644 docs/tree.html diff --git a/docs/.gitignore b/docs/.gitignore index c6a61aa..07f4fe3 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -2,3 +2,4 @@ !* !*/* cache_db.json +cache_tree.json diff --git a/docs/codeview.html b/docs/codeview.html index 9e2cb49..4f40c39 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -388,7 +388,23 @@ const h1_element = document.getElementById('code-path'); // h1_element.textContent = code_path.slice('src/'.length); // debugger; - h1_element.textContent = project_name + "/" + code_path.slice('src/'.length); + // let mycodepath = project_name + "/" + code_path.slice('src/'.length); + // `?full=true#${manchor}` + let myhtml = `${project_name}`; + let myslices = code_path.slice('src/'.length).split('/') + // debugger; + let accpath = "" + for (let i in myslices) { + s = myslices[i]; + accpath += ("/" + s) + var maccpath = accpath + if (i != (myslices.length - 1)) { + maccpath = accpath + "/" + } + myhtml += `/${s}` + } + + h1_element.innerHTML = myhtml; pre_elem.className = `language-${language}` pre_elem.id = "mycode"; pre_elem.setAttribute("data-src", code_path); @@ -413,7 +429,7 @@ }()); + + + + + +
+

Project Structure of: openai/DALL-E

+
    +
  • DALL-E DALL-E: AI model, codebase, text to image, PyTorch, requirements.
      +
    • README.md DALL-E's VAE package, no transformer for text to image. Install with pip.
    • +
    • dall_e DALL-E: AI model, PyTorch decoder-encoder, neural encoder with residual paths.
        +
      • init.py Load DALL-E model from URL or local file.
      • +
      • decoder.py DALL-E: Decoder PyTorch Model, Encoder-Decoder, ConvLayers
      • +
      • encoder.py Encoder neural network blocks with residual paths.
      • +
      • utils.py DallE utilities: Conv2d class, scale, conv, pad.
      • +
      +
    • +
    • model_card.md DALL·E's dVAE: Reduces memory, loses fine details.
    • +
    • notebooks
        +
      • usage.py Image download, preprocessing, DALL-E model usage and display.
      • +
      +
    • +
    • requirements.txt Essential packages for codebase.
    • +
    • setup.py Setup Python package "DALL-E" with requirements from "requirements.txt".
    • +
    +
  • +
+
+ + + + \ No newline at end of file From c2f6e44efd624e37a671a777c06c49ef5a305c30 Mon Sep 17 00:00:00 2001 From: GitJournal Date: Wed, 10 Jan 2024 05:54:17 +0800 Subject: [PATCH 03/28] update1 --- docs/index.html | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/docs/index.html b/docs/index.html index 9597a35..a46d767 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,5 +1,6 @@ + @@ -395,6 +396,7 @@

Document Index of: // const RESULT_LIMIT = 50; const progressOverlay = document.getElementById('progress-overlay'); const progressBar = document.querySelector('.progress'); + var isDebugMode = false; function navigateToPage(base_filepath, language_id, project_id, detail_filepath = "") { // Use a relative path to navigate to a specific page let page_param = "codeview.html"; @@ -580,6 +582,7 @@

Document Index of: const field = result.field; // "content" const ids = result.result; ids.forEach(id => { + id = id - 0 // to integer. const data_type = data[id].type; const file_id = data[id].file_id; if (searchResultItems[file_id] === undefined) { @@ -594,7 +597,11 @@

Document Index of: if (detail_types.indexOf(data_type) != -1) { const pair_id = ((data_type === "code") ? id : (id - 1)); if (searchResultItems[file_id].pairs[pair_id] === undefined) { - searchResultItems[file_id].pairs[pair_id] = { "left": data[pair_id].content, "right": data[pair_id - 1].content, "location": data[pair_id].location }; + // console.log(pair_id, pair_id+1, data_type, data[pair_id].location) + let mit = { "left": data[pair_id].content, "right": data[pair_id + 1].content, "location": data[pair_id].location } + // let mit = { "left": data[pair_id].content, "right": data[pair_id - 1].content, "location": data[pair_id].location } + // console.log(mit) + searchResultItems[file_id].pairs[pair_id] = mit; } } // listItem.textContent = `[${field}][${data_type}] ${data[id][field]}`; @@ -717,7 +724,20 @@

Document Index of: hasInputEvent = false; if (!isSearchRunning) { isSearchRunning = true; - searchInputEventListener(); + try { + searchInputEventListener(); + } catch (error) { + // Print exception info + console.error(error); + if (isDebugMode) { + // Show alert window in debug mode + console.log(error.message); + alert("An error occurred when searching: " + error.message); + } else { + // Raise exception when not in debug mode + throw error; + } + } isSearchRunning = false; } } @@ -747,6 +767,8 @@

Document Index of: const queryParams = getQueryParams(window.location.search); const query_from_url = queryParams.q; const file_path_from_url = queryParams.file; + isDebugMode = queryParams.debug == 'true'; + if (isDebugMode){console.log("You are in debug mode.")} if (query_from_url != null || query_from_url != undefined) { setTextAndTriggerInputEvent(query_from_url) } else if (file_path_from_url != null || file_path_from_url != undefined) { From fa4a1333abee1e139d394fe02da002029f20db35 Mon Sep 17 00:00:00 2001 From: GitJournal Date: Wed, 10 Jan 2024 07:41:05 +0800 Subject: [PATCH 04/28] update1 --- docs/codeview.html | 5 ++++- docs/index.html | 36 +++++++++++++++++++++++++++++++----- docs/metadata.json | 2 +- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index 4f40c39..333e92a 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -465,8 +465,12 @@ /* Provide spacing to accommodate the fixed header */ /* Ensure the section fills the remaining viewport height */ overflow-y: auto; + /* white-space: pre-wrap; */ /* Enable vertical scrolling if content exceeds viewport height */ } + code { + white-space: pre-wrap !important; + } .container { display: flex; @@ -512,7 +516,6 @@ html, body { - margin-left: 5%; margin-right: 5%; } diff --git a/docs/index.html b/docs/index.html index a46d767..ccd2be6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -107,6 +107,14 @@ - + + +

Code Preview

-
+
-
+
\ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 9afde32..2254d8b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -247,7 +247,7 @@ display: flex; border: 1px solid #ccc; /* justify-content: space-between; */ - flex-direction: row; + /* flex-direction: row; */ height:min-content; /* align-items: flex-start; */ } From 7d510fb2b65e2e30c3cb6dbda5acec43394697e5 Mon Sep 17 00:00:00 2001 From: GitJournal Date: Thu, 11 Jan 2024 03:14:21 +0800 Subject: [PATCH 07/28] update --- docs/metadata.json | 2 +- docs/tree.html | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/metadata.json b/docs/metadata.json index 0086025..cc3060f 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -7,7 +7,7 @@ "0": { "filepath": "/README.md", "entry_id": 0, - "language_id": "markdown" + "language_id": "plain-text" }, "1": { "filepath": "/model_card.md", diff --git a/docs/tree.html b/docs/tree.html index 0169fe8..d79d023 100644 --- a/docs/tree.html +++ b/docs/tree.html @@ -74,21 +74,21 @@

Project Structure of: openai/DALL-E

  • DALL-E DALL-E: AI model, codebase, text to image, PyTorch, requirements.
      -
    • README.md DALL-E's VAE package, no transformer for text to image. Install with pip.
    • -
    • dall_e DALL-E: AI model, PyTorch decoder-encoder, neural encoder with residual paths.
        -
      • init.py Load DALL-E model from URL or local file.
      • -
      • decoder.py DALL-E: Decoder PyTorch Model, Encoder-Decoder, ConvLayers
      • -
      • encoder.py Encoder neural network blocks with residual paths.
      • -
      • utils.py DallE utilities: Conv2d class, scale, conv, pad.
      • +
      • README.md DALL-E's VAE package, no transformer for text to image. Install with pip.
      • +
      • dall_e DALL-E: AI model, PyTorch decoder-encoder, neural encoder with residual paths.
          +
        • __init__.py Load DALL-E model from URL or local file.
        • +
        • decoder.py DALL-E: Decoder PyTorch Model, Encoder-Decoder, ConvLayers
        • +
        • encoder.py Encoder neural network blocks with residual paths.
        • +
        • utils.py DallE utilities: Conv2d class, scale, conv, pad.
      • -
      • model_card.md DALL·E's dVAE: Reduces memory, loses fine details.
      • +
      • model_card.md DALL·E's dVAE: Reduces memory, loses fine details.
      • notebooks
          -
        • usage.py Image download, preprocessing, DALL-E model usage and display.
        • +
        • usage.py Image download, preprocessing, DALL-E model usage and display.
      • -
      • requirements.txt Essential packages for codebase.
      • -
      • setup.py Setup Python package "DALL-E" with requirements from "requirements.txt".
      • +
      • requirements.txt Essential packages for codebase.
      • +
      • setup.py Setup Python package "DALL-E" with requirements from "requirements.txt".
    From b4c94f858a94a580330884a03ed4806205087457 Mon Sep 17 00:00:00 2001 From: GitJournal Date: Thu, 11 Jan 2024 03:34:47 +0800 Subject: [PATCH 08/28] update --- docs/codeview.html | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index 7f30366..10bbd48 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -432,7 +432,32 @@ var xhr = new XMLHttpRequest(); xhr.open('GET', code_path, false); // The third parameter is set to false for synchronous request xhr.send(null); - code_elem.textContent = xhr.responseText; + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}/main/docs/` + code_path + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}/master/docs/` + code_path + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { code_elem.textContent = "Failed to load code." } + } + } + pre_elem.appendChild(code_elem); // pre_elem.setAttribute("data-src", code_path); section_elem.appendChild(pre_elem) @@ -584,11 +609,13 @@

    Code Preview

    -
    -
    - -
    + + \ No newline at end of file From 43b70df7f1be3f863b23943f1d61c267ac9a8059 Mon Sep 17 00:00:00 2001 From: GitJournal Date: Thu, 11 Jan 2024 14:41:49 +0800 Subject: [PATCH 09/28] update --- docs/codeview.html | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index 10bbd48..2cd9e4c 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -430,6 +430,8 @@ const code_elem = document.createElement('code'); code_elem.className = `language-${language}` var xhr = new XMLHttpRequest(); + console.log("trying: " + code_path) + xhr.open('GET', code_path, false); // The third parameter is set to false for synchronous request xhr.send(null); if (xhr.status == 200) { @@ -438,7 +440,10 @@ var xhr = new XMLHttpRequest(); // TODO: mitigate this evil hack by passing more info of the original project. + // the reason is that github does not allow accessing file with '_' as prefix. + // is that disabled for security reasons? var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}/main/docs/` + code_path + console.log("trying: " + newLink) xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request xhr.send(null); @@ -454,7 +459,34 @@ if (xhr.status == 200) { code_elem.textContent = xhr.responseText; - } else { code_elem.textContent = "Failed to load code." } + } else { + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}_doc/master/` + code_path + console.log("trying: " + newLink) + + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}_doc/main/` + code_path + console.log("trying: " + newLink) + + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + + code_elem.textContent = "Failed to load code." + } + } + } } } @@ -532,10 +564,9 @@ /* Enable vertical scrolling if content exceeds viewport height */ } - .line-highlight { + /* .line-highlight { background-color: rgba(228, 239, 12, 0.07) !important; - /* z-index:-10; */ - } + } */ /* do this after jump */ /* .line-highlight { From daddcfe9fa9761c2c5e1be06effcc46c34722fd2 Mon Sep 17 00:00:00 2001 From: GitJournal Date: Sat, 13 Jan 2024 17:10:01 +0800 Subject: [PATCH 10/28] update --- docs/cache_title.json | 1 + docs/codeview.html | 1 + docs/data/0.json | 250 ++++---- docs/data/titles/0.json | 25 + docs/index.html | 1177 +++++++++++++++++++++++++------------- docs/metadata.json | 40 +- docs/metadata_title.json | 1 + docs/tree.html | 38 +- 8 files changed, 975 insertions(+), 558 deletions(-) create mode 100644 docs/cache_title.json create mode 100644 docs/data/titles/0.json create mode 100644 docs/metadata_title.json diff --git a/docs/cache_title.json b/docs/cache_title.json new file mode 100644 index 0000000..717b3cc --- /dev/null +++ b/docs/cache_title.json @@ -0,0 +1 @@ +{"_default": {"1": {"path": "/README.md", "hash": "0c8cf0c67a41f4f8e7b16eacf23aa439", "title": "DALL-E's Discrete VAE PyTorch Package"}, "2": {"path": "/dall_e/__init__.py", "hash": "a2ad31846c456c5dd262ff3e610c7f51", "title": "DALL-E Model Loader"}, "3": {"path": "/dall_e/decoder.py", "hash": "167ae29ab96bf52a32f571768136de5d", "title": "DALL-E: Multigroup Convolutional Architecture"}, "4": {"path": "/dall_e/decoder.py:1-31", "hash": "d52e2cec072b3969e6e817669659da44", "title": "DecoderBlock: Neural Network Module for Convolutions and Identity Paths"}, "5": {"path": "/dall_e/decoder.py:32-54", "hash": "a3b5c6829b47300aa6f3624bd6eb809c", "title": "PyTorch Decoder Class: Forward Pass and Residual Paths"}, "6": {"path": "/dall_e/decoder.py:55-74", "hash": "6c24c98bf38b1371c2be2418bcffdd9c", "title": "DALL-E Neural Network Initialization"}, "7": {"path": "/dall_e/decoder.py:75-94", "hash": "1fbefc86698ab9fa845adfc466e70b1a", "title": "Encoder-Decoder Model in PyTorch"}, "8": {"path": "/dall_e/encoder.py", "hash": "6383eadd884947538edad2e37235b6e9", "title": "Dall-E Encoder Blocks and Neural Network Modules"}, "9": {"path": "/dall_e/encoder.py:1-31", "hash": "2fb8ace5910e0441c3bad2de24a6c2b7", "title": "EncoderBlock: Neural Network Encoder Module"}, "10": {"path": "/dall_e/encoder.py:32-56", "hash": "8841a8636148f5caf49e33de070385c8", "title": "Residual Convolutional Encoder Class"}, "11": {"path": "/dall_e/encoder.py:57-77", "hash": "1f78870eb5d37a07c99642df1f7b6e0a", "title": "Multi-Group Encoder Neural Network"}, "12": {"path": "/dall_e/encoder.py:77-93", "hash": "2f9d5fe05c8f88167b710c254b417574", "title": "Convolutional Neural Network Module"}, "13": {"path": "/dall_e/utils.py", "hash": "552e5e4523a9a66ef87593c354b6dd6b", "title": "Conv2d Class and Operations"}, "14": {"path": "/dall_e/utils.py:1-32", "hash": "9edfb4e0a1f4249a6b7a927dbfa229a6", "title": "Conv2D Layer Implementation"}, "15": {"path": "/dall_e/utils.py:33-59", "hash": "247e17199bc49bb2920c1c83bed30ca0", "title": "Convolution Utilities"}, "16": {"path": "/model_card.md", "hash": "39288813f85750d943afe91afe17537e", "title": "DVAE: Efficient but Fidelity Limited"}, "17": {"path": "/model_card.md:1-21", "hash": "30360226f34c4000ed480ee8a15aa968", "title": "Discrete VAE Model Card for DALL\u00b7E"}, "18": {"path": "/model_card.md:23-41", "hash": "72f3c5f49c0a78eb67d03c8bc49cd176", "title": "Unsuitable for High-Fidelity Image Compression"}, "19": {"path": "/notebooks/usage.py", "hash": "a32db587e530a6751718191a7f274a30", "title": "DALL-E Image Processing"}, "20": {"path": "/notebooks/usage.py:1-46", "hash": "3a117226a55d47b57fc635d41a59b0b1", "title": "Code for Image Processing and Downloads"}, "21": {"path": "/notebooks/usage.py:47-76", "hash": "f1662a832179b34fd852122af1bf2c19", "title": "Reconstruct DALL-E Images"}, "22": {"path": "/requirements.txt", "hash": "05a5350a9aee2a6a01bd692240cafbf2", "title": "Dependent Libraries for Codebase"}, "23": {"path": "/setup.py", "hash": "68de49c1050d99af666d83d3319c98e3", "title": "DALL-E: Discrete VAE PyTorch Package Setup"}}} \ No newline at end of file diff --git a/docs/codeview.html b/docs/codeview.html index 2cd9e4c..d0da4af 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -560,6 +560,7 @@ /* Provide spacing to accommodate the fixed header */ /* Ensure the section fills the remaining viewport height */ overflow-y: auto; + /* overflow-x:hidden; */ /* white-space: pre-wrap; */ /* Enable vertical scrolling if content exceeds viewport height */ } diff --git a/docs/data/0.json b/docs/data/0.json index 5b2c136..abe7c76 100644 --- a/docs/data/0.json +++ b/docs/data/0.json @@ -22,269 +22,269 @@ }, "4": { "file_id": 1, - "content": "/model_card.md", + "content": "/dall_e/__init__.py", "type": "filepath" }, "5": { "file_id": 1, - "content": "DALL·E's dVAE model by OpenAI reduces memory footprint but is unsuitable for high-fidelity image processing and general-purpose image compression due to loss of fine details.", + "content": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", "type": "summary" }, "6": { "file_id": 1, - "content": "# Model Card: DALL·E dVAE\nFollowing [Model Cards for Model Reporting (Mitchell et al.)](https://arxiv.org/abs/1810.03993) and [Lessons from\nArchives (Jo & Gebru)](https://arxiv.org/pdf/1912.10389.pdf), we're providing some information about about the discrete\nVAE (dVAE) that was used to train DALL·E.\n## Model Details\nThe dVAE was developed by researchers at OpenAI to reduce the memory footprint of the transformer trained on the\ntext-to-image generation task. The details involved in training the dVAE are described in [the paper][dalle_paper]. This\nmodel card describes the first version of the model, released in February 2021. The model consists of a convolutional\nencoder and decoder whose architectures are described [here](dall_e/encoder.py) and [here](dall_e/decoder.py), respectively.\nFor questions or comments about the models or the code release, please file a Github issue.\n## Model Use\n### Intended Use\nThe model is intended for others to use for training their own generative models.\n### Out-of-Scope Use Cases", + "content": "import io, requests\nimport torch\nimport torch.nn as nn\nfrom dall_e.encoder import Encoder\nfrom dall_e.decoder import Decoder\nfrom dall_e.utils import map_pixels, unmap_pixels\ndef load_model(path: str, device: torch.device = None) -> nn.Module:\n if path.startswith('http://') or path.startswith('https://'):\n resp = requests.get(path)\n resp.raise_for_status()\n with io.BytesIO(resp.content) as buf:\n return torch.load(buf, map_location=device)\n else:\n with open(path, 'rb') as f:\n return torch.load(f, map_location=device)", "type": "code", - "location": "/model_card.md:1-21" + "location": "/dall_e/__init__.py:1-18" }, "7": { "file_id": 1, - "content": "This is the model card for DALL·E's discrete VAE (dVAE), which was developed by OpenAI to reduce transformer memory footprint.", + "content": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", "type": "comment" }, "8": { - "file_id": 1, - "content": "This model is inappropriate for high-fidelity image processing applications. We also do not recommend its use as a\ngeneral-purpose image compressor.\n## Training Data\nThe model was trained on publicly available text-image pairs collected from the internet. This data consists partly of\n[Conceptual Captions][cc] and a filtered subset of [YFCC100M][yfcc100m]. We used a subset of the filters described in\n[Sharma et al.][cc_paper] to construct this dataset; further details are described in [our paper][dalle_paper]. We will\nnot be releasing the dataset.\n## Performance and Limitations\nThe heavy compression from the encoding process results in a noticeable loss of detail in the reconstructed images. This\nrenders it inappropriate for applications that require fine-grained details of the image to be preserved.\n[dalle_paper]: https://arxiv.org/abs/2102.12092\n[cc]: https://ai.google.com/research/ConceptualCaptions\n[cc_paper]: https://www.aclweb.org/anthology/P18-1238/\n[yfcc100m]: http://projects.dfki.uni-kl.de/yfcc100m/", - "type": "code", - "location": "/model_card.md:23-41" + "file_id": 2, + "content": "/dall_e/decoder.py", + "type": "filepath" }, "9": { - "file_id": 1, - "content": "The model is not suitable for high-fidelity image processing or general-purpose image compression. It was trained on a mix of Conceptual Captions and filtered YFCC100M datasets using specific filters, details in the paper. The dataset will not be released. Compression leads to loss of fine image details, making it unsuitable for applications requiring preserved details.", - "type": "comment" + "file_id": 2, + "content": "PyTorch models are described in both comments, with Comment A focusing on DALL-E's architecture involving multiple groups and partial functions for convolutional layers. In contrast, Comment B presents an encoder-decoder model using convolutional layers, residual connections, and ReLU activations.", + "type": "summary" }, "10": { "file_id": 2, - "content": "/requirements.txt", - "type": "filepath" + "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass DecoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 1)),\n\t\t\t\t('relu_2', nn.ReLU()),", + "type": "code", + "location": "/dall_e/decoder.py:1-31" }, "11": { "file_id": 2, - "content": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", - "type": "summary" + "content": "This code defines a DecoderBlock class which is a neural network module. It takes input size (n_in), output size (n_out), number of layers (n_layers), device, and requires_grad as attributes. It initializes the hidden layer size (n_hid), post gain value, and makes convolution layers using partial function. The id_path is an identity path if n_in == n_out, otherwise it's a convolution layer. The res_path is a sequence of ReLU activations and convolution layers.", + "type": "comment" }, "12": { "file_id": 2, - "content": "Pillow\nblobfile\nmypy\nnumpy\npytest\nrequests\ntorch\ntorchvision", + "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 3)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Decoder(nn.Module):\n\tgroup_count: int = 4\n\tn_init: int = attr.ib(default=128, validator=lambda i, a, x: x >= 8)\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\toutput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:", "type": "code", - "location": "/requirements.txt:1-8" + "location": "/dall_e/decoder.py:32-54" }, "13": { "file_id": 2, - "content": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "content": "This code defines a class called Decoder which is an instance of the nn.Module class in PyTorch. It has several attributes such as group_count, n_init, n_hid, n_blk_per_group, output_channels, vocab_size, device, requires_grad, and use_mixed_precision. The forward method is defined to compute the forward pass of the decoder network. It uses a combination of the id_path and res_path outputs, which are likely residual paths in the network. The make_conv function seems to be used to create convolutional layers with specified parameters.", "type": "comment" }, "14": { - "file_id": 3, - "content": "/setup.py", - "type": "filepath" + "file_id": 2, + "content": "\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)\n\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(DecoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.vocab_size, self.n_init, 1, use_float16=False)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(self.n_init if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(8 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],", + "type": "code", + "location": "/dall_e/decoder.py:55-74" }, "15": { - "file_id": 3, - "content": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", - "type": "summary" + "file_id": 2, + "content": "This code initializes a neural network for the DALL-E model, consisting of multiple groups with progressively smaller block sizes. It uses partial functions to create convolutional layers and blocks. The input is fed through a series of upsampling and convolution operations in each group before being processed by the final output layer.", + "type": "comment" }, "16": { - "file_id": 3, - "content": "from setuptools import setup\ndef parse_requirements(filename):\n\tlines = (line.strip() for line in open(filename))\n\treturn [line for line in lines if line and not line.startswith(\"#\")]\nsetup(name='DALL-E',\n version='0.1',\n description='PyTorch package for the discrete VAE used for DALL·E.',\n url='http://github.com/openai/DALL-E',\n author='Aditya Ramesh',\n author_email='aramesh@openai.com',\n license='BSD',\n packages=['dall_e'],\n install_requires=parse_requirements('requirements.txt'),\n zip_safe=True)", + "file_id": 2, + "content": "\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(1 * self.n_hid, 2 * self.output_channels, 1)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.vocab_size:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.vocab_size}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)", "type": "code", - "location": "/setup.py:1-16" + "location": "/dall_e/decoder.py:75-94" }, "17": { - "file_id": 3, - "content": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", + "file_id": 2, + "content": "This code defines a class for an encoder-decoder model in PyTorch. The forward function takes in an input tensor and passes it through multiple blocks before returning the output tensor. The model consists of convolutional layers, residual connections, and ReLU activations to process input data.", "type": "comment" }, "18": { - "file_id": 4, - "content": "/dall_e/__init__.py", + "file_id": 3, + "content": "/dall_e/encoder.py", "type": "filepath" }, "19": { - "file_id": 4, - "content": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", + "file_id": 3, + "content": "The code defines two classes, \"EncoderBlock\" and \"Encoder\", for neural network modules with 4 groups of 2 blocks each, using residual paths. It encodes input, performs max pooling, checks errors, and has various attributes for computation.", "type": "summary" }, "20": { - "file_id": 4, - "content": "import io, requests\nimport torch\nimport torch.nn as nn\nfrom dall_e.encoder import Encoder\nfrom dall_e.decoder import Decoder\nfrom dall_e.utils import map_pixels, unmap_pixels\ndef load_model(path: str, device: torch.device = None) -> nn.Module:\n if path.startswith('http://') or path.startswith('https://'):\n resp = requests.get(path)\n resp.raise_for_status()\n with io.BytesIO(resp.content) as buf:\n return torch.load(buf, map_location=device)\n else:\n with open(path, 'rb') as f:\n return torch.load(f, map_location=device)", + "file_id": 3, + "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass EncoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 3)),\n\t\t\t\t('relu_2', nn.ReLU()),", "type": "code", - "location": "/dall_e/__init__.py:1-18" + "location": "/dall_e/encoder.py:1-31" }, "21": { - "file_id": 4, - "content": "The code imports necessary libraries, defines a function load_model that loads the DALL-E model from a given path, and handles loading the model from either URL or local file.", + "file_id": 3, + "content": "This code defines a class named \"EncoderBlock\" which is a module for an encoder block in the neural network. It takes input parameters such as the number of input features (n_in), output features (n_out), and layers (n_layers). The module also has properties like device, requires_grad, and initializes instance variables n_hid, post_gain. It uses a partial function to make a convolution layer and creates an identity path and residual path for the encoder block.", "type": "comment" }, "22": { - "file_id": 5, - "content": "/dall_e/decoder.py", - "type": "filepath" + "file_id": 3, + "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 1)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Encoder(nn.Module):\n\tgroup_count: int = 4\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\tinput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)", + "type": "code", + "location": "/dall_e/encoder.py:32-56" }, "23": { - "file_id": 5, - "content": "PyTorch models are described in both comments, with Comment A focusing on DALL-E's architecture involving multiple groups and partial functions for convolutional layers. In contrast, Comment B presents an encoder-decoder model using convolutional layers, residual connections, and ReLU activations.", - "type": "summary" + "file_id": 3, + "content": "This code defines a class called \"Encoder\" which is a type of neural network module. It has 4 groups, each group containing 2 blocks of convolutional layers and activation functions. The input is passed through the identity path and the residual path, then their sum is returned as output. The Encoder class also has several attributes such as number of hidden units, number of block per group, input channels, vocabulary size, device to run on, requires gradient computation, and use mixed precision.", + "type": "comment" }, "24": { - "file_id": 5, - "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass DecoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 1)),\n\t\t\t\t('relu_2', nn.ReLU()),", + "file_id": 3, + "content": "\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(EncoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.input_channels, 1 * self.n_hid, 7)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t", "type": "code", - "location": "/dall_e/decoder.py:1-31" + "location": "/dall_e/encoder.py:57-77" }, "25": { - "file_id": 5, - "content": "This code defines a DecoderBlock class which is a neural network module. It takes input size (n_in), output size (n_out), number of layers (n_layers), device, and requires_grad as attributes. It initializes the hidden layer size (n_hid), post gain value, and makes convolution layers using partial function. The id_path is an identity path if n_in == n_out, otherwise it's a convolution layer. The res_path is a sequence of ReLU activations and convolution layers.", + "file_id": 3, + "content": "This code is creating a neural network encoder with multiple blocks. It consists of four groups, each with different number of layers and hidden size. Each group has a series of EncoderBlocks followed by a max pooling operation. The input channel size is defined based on the current block and group configuration.", "type": "comment" }, "26": { - "file_id": 5, - "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 3)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Decoder(nn.Module):\n\tgroup_count: int = 4\n\tn_init: int = attr.ib(default=128, validator=lambda i, a, x: x >= 8)\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\toutput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:", + "file_id": 3, + "content": "*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(8 * self.n_hid, self.vocab_size, 1, use_float16=False)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.input_channels:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.input_channels}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)", "type": "code", - "location": "/dall_e/decoder.py:32-54" + "location": "/dall_e/encoder.py:77-93" }, "27": { - "file_id": 5, - "content": "This code defines a class called Decoder which is an instance of the nn.Module class in PyTorch. It has several attributes such as group_count, n_init, n_hid, n_blk_per_group, output_channels, vocab_size, device, requires_grad, and use_mixed_precision. The forward method is defined to compute the forward pass of the decoder network. It uses a combination of the id_path and res_path outputs, which are likely residual paths in the network. The make_conv function seems to be used to create convolutional layers with specified parameters.", + "file_id": 3, + "content": "The code defines a neural network module that takes an input tensor and passes it through multiple blocks of convolutional layers. The output is then processed by another set of convolutional layers before returning the final result. The function also includes error checks for the shape, number of channels, and data type of the input tensor to ensure proper functioning.", "type": "comment" }, "28": { - "file_id": 5, - "content": "\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)\n\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(DecoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.vocab_size, self.n_init, 1, use_float16=False)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(self.n_init if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(8 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],", - "type": "code", - "location": "/dall_e/decoder.py:55-74" + "file_id": 4, + "content": "/dall_e/utils.py", + "type": "filepath" }, "29": { - "file_id": 5, - "content": "This code initializes a neural network for the DALL-E model, consisting of multiple groups with progressively smaller block sizes. It uses partial functions to create convolutional layers and blocks. The input is fed through a series of upsampling and convolution operations in each group before being processed by the final output layer.", - "type": "comment" + "file_id": 4, + "content": "The code defines a Conv2d class with attributes and initializes weights and biases. It also includes three functions, `map_pixels`, `unmap_pixels`, and `conv2d`, for scaling, convolution operation, and padding based on kernel width.", + "type": "summary" }, "30": { - "file_id": 5, - "content": "\t\t\t\t('upsample', nn.Upsample(scale_factor=2, mode='nearest')),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(1 * self.n_hid, 2 * self.output_channels, 1)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.vocab_size:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.vocab_size}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)", + "file_id": 4, + "content": "import attr\nimport math\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nlogit_laplace_eps: float = 0.1\n@attr.s(eq=False)\nclass Conv2d(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tkw: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 2 == 1)\n\tuse_float16: bool = attr.ib(default=True)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tw = torch.empty((self.n_out, self.n_in, self.kw, self.kw), dtype=torch.float32,\n\t\t\tdevice=self.device, requires_grad=self.requires_grad)\n\t\tw.normal_(std=1 / math.sqrt(self.n_in * self.kw ** 2))\n\t\tb = torch.zeros((self.n_out,), dtype=torch.float32, device=self.device,\n\t\t\trequires_grad=self.requires_grad)\n\t\tself.w, self.b = nn.Parameter(w), nn.Parameter(b)\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif self.use_float16 and 'cuda' in self.w.device.type:", "type": "code", - "location": "/dall_e/decoder.py:75-94" + "location": "/dall_e/utils.py:1-32" }, "31": { - "file_id": 5, - "content": "This code defines a class for an encoder-decoder model in PyTorch. The forward function takes in an input tensor and passes it through multiple blocks before returning the output tensor. The model consists of convolutional layers, residual connections, and ReLU activations to process input data.", + "file_id": 4, + "content": "This code defines a Conv2d class that extends torch.nn.Module and implements a 2D convolutional layer using the nn.Conv2d module from PyTorch. The class has several attributes including number of input channels (n_in), number of output channels (n_out), kernel width (kw), use_float16 for whether to use float16 or float32, device for tensor storage location, and requires_grad for whether the parameters should be tracked during backpropagation. The class initializes the weight matrix (self.w) with normal distribution and bias (self.b) as zeros. It also has a forward method that applies the convolution operation on the input tensor (x).", "type": "comment" }, "32": { - "file_id": 6, - "content": "/dall_e/encoder.py", - "type": "filepath" + "file_id": 4, + "content": "\t\t\tif x.dtype != torch.float16:\n\t\t\t\tx = x.half()\n\t\t\tw, b = self.w.half(), self.b.half()\n\t\telse:\n\t\t\tif x.dtype != torch.float32:\n\t\t\t\tx = x.float()\n\t\t\tw, b = self.w, self.b\n\t\treturn F.conv2d(x, w, b, padding=(self.kw - 1) // 2)\ndef map_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps\ndef unmap_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn torch.clamp((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1)", + "type": "code", + "location": "/dall_e/utils.py:33-59" }, "33": { - "file_id": 6, - "content": "The code defines two classes, \"EncoderBlock\" and \"Encoder\", for neural network modules with 4 groups of 2 blocks each, using residual paths. It encodes input, performs max pooling, checks errors, and has various attributes for computation.", - "type": "summary" + "file_id": 4, + "content": "The code defines three functions: `map_pixels`, `unmap_pixels`, and `conv2d`. The `map_pixels` function scales the input tensor by a factor and adds a constant to it. It also checks if the input tensor is 4-dimensional and has the correct data type (float). Similarly, the `unmap_pixels` function scales and shifts the input tensor, and ensures the correct dimensions and data type. The `conv2d` function applies a convolution operation on the input tensor with specified weights and biases, and handles the padding based on the kernel width.", + "type": "comment" }, "34": { - "file_id": 6, - "content": "import attr\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom collections import OrderedDict\nfrom functools import partial\nfrom dall_e.utils import Conv2d\n@attr.s(eq=False, repr=False)\nclass EncoderBlock(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 4 ==0)\n\tn_layers: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tdevice: torch.device = attr.ib(default=None)\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tself.n_hid = self.n_out // 4\n\t\tself.post_gain = 1 / (self.n_layers ** 2)\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tself.id_path = make_conv(self.n_in, self.n_out, 1) if self.n_in != self.n_out else nn.Identity()\n\t\tself.res_path = nn.Sequential(OrderedDict([\n\t\t\t\t('relu_1', nn.ReLU()),\n\t\t\t\t('conv_1', make_conv(self.n_in, self.n_hid, 3)),\n\t\t\t\t('relu_2', nn.ReLU()),", - "type": "code", - "location": "/dall_e/encoder.py:1-31" + "file_id": 5, + "content": "/model_card.md", + "type": "filepath" }, "35": { - "file_id": 6, - "content": "This code defines a class named \"EncoderBlock\" which is a module for an encoder block in the neural network. It takes input parameters such as the number of input features (n_in), output features (n_out), and layers (n_layers). The module also has properties like device, requires_grad, and initializes instance variables n_hid, post_gain. It uses a partial function to make a convolution layer and creates an identity path and residual path for the encoder block.", - "type": "comment" + "file_id": 5, + "content": "DALL·E's dVAE model by OpenAI reduces memory footprint but is unsuitable for high-fidelity image processing and general-purpose image compression due to loss of fine details.", + "type": "summary" }, "36": { - "file_id": 6, - "content": "\t\t\t\t('conv_2', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_3', nn.ReLU()),\n\t\t\t\t('conv_3', make_conv(self.n_hid, self.n_hid, 3)),\n\t\t\t\t('relu_4', nn.ReLU()),\n\t\t\t\t('conv_4', make_conv(self.n_hid, self.n_out, 1)),]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\treturn self.id_path(x) + self.post_gain * self.res_path(x)\n@attr.s(eq=False, repr=False)\nclass Encoder(nn.Module):\n\tgroup_count: int = 4\n\tn_hid: int = attr.ib(default=256, validator=lambda i, a, x: x >= 64)\n\tn_blk_per_group: int = attr.ib(default=2, validator=lambda i, a, x: x >= 1)\n\tinput_channels: int = attr.ib(default=3, validator=lambda i, a, x: x >= 1)\n\tvocab_size: int = attr.ib(default=8192, validator=lambda i, a, x: x >= 512)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tuse_mixed_precision: bool = attr.ib(default=True)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tblk_range = range(self.n_blk_per_group)", + "file_id": 5, + "content": "# Model Card: DALL·E dVAE\nFollowing [Model Cards for Model Reporting (Mitchell et al.)](https://arxiv.org/abs/1810.03993) and [Lessons from\nArchives (Jo & Gebru)](https://arxiv.org/pdf/1912.10389.pdf), we're providing some information about about the discrete\nVAE (dVAE) that was used to train DALL·E.\n## Model Details\nThe dVAE was developed by researchers at OpenAI to reduce the memory footprint of the transformer trained on the\ntext-to-image generation task. The details involved in training the dVAE are described in [the paper][dalle_paper]. This\nmodel card describes the first version of the model, released in February 2021. The model consists of a convolutional\nencoder and decoder whose architectures are described [here](dall_e/encoder.py) and [here](dall_e/decoder.py), respectively.\nFor questions or comments about the models or the code release, please file a Github issue.\n## Model Use\n### Intended Use\nThe model is intended for others to use for training their own generative models.\n### Out-of-Scope Use Cases", "type": "code", - "location": "/dall_e/encoder.py:32-56" + "location": "/model_card.md:1-21" }, "37": { - "file_id": 6, - "content": "This code defines a class called \"Encoder\" which is a type of neural network module. It has 4 groups, each group containing 2 blocks of convolutional layers and activation functions. The input is passed through the identity path and the residual path, then their sum is returned as output. The Encoder class also has several attributes such as number of hidden units, number of block per group, input channels, vocabulary size, device to run on, requires gradient computation, and use mixed precision.", + "file_id": 5, + "content": "This is the model card for DALL·E's discrete VAE (dVAE), which was developed by OpenAI to reduce transformer memory footprint.", "type": "comment" }, "38": { - "file_id": 6, - "content": "\t\tn_layers = self.group_count * self.n_blk_per_group\n\t\tmake_conv = partial(Conv2d, device=self.device, requires_grad=self.requires_grad)\n\t\tmake_blk = partial(EncoderBlock, n_layers=n_layers, device=self.device,\n\t\t\t\trequires_grad=self.requires_grad)\n\t\tself.blocks = nn.Sequential(OrderedDict([\n\t\t\t('input', make_conv(self.input_channels, 1 * self.n_hid, 7)),\n\t\t\t('group_1', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid, 1 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_2', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(1 * self.n_hid if i == 0 else 2 * self.n_hid, 2 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_3', nn.Sequential(OrderedDict([\n\t\t\t\t*[(f'block_{i + 1}', make_blk(2 * self.n_hid if i == 0 else 4 * self.n_hid, 4 * self.n_hid)) for i in blk_range],\n\t\t\t\t('pool', nn.MaxPool2d(kernel_size=2)),\n\t\t\t]))),\n\t\t\t('group_4', nn.Sequential(OrderedDict([\n\t\t\t\t", + "file_id": 5, + "content": "This model is inappropriate for high-fidelity image processing applications. We also do not recommend its use as a\ngeneral-purpose image compressor.\n## Training Data\nThe model was trained on publicly available text-image pairs collected from the internet. This data consists partly of\n[Conceptual Captions][cc] and a filtered subset of [YFCC100M][yfcc100m]. We used a subset of the filters described in\n[Sharma et al.][cc_paper] to construct this dataset; further details are described in [our paper][dalle_paper]. We will\nnot be releasing the dataset.\n## Performance and Limitations\nThe heavy compression from the encoding process results in a noticeable loss of detail in the reconstructed images. This\nrenders it inappropriate for applications that require fine-grained details of the image to be preserved.\n[dalle_paper]: https://arxiv.org/abs/2102.12092\n[cc]: https://ai.google.com/research/ConceptualCaptions\n[cc_paper]: https://www.aclweb.org/anthology/P18-1238/\n[yfcc100m]: http://projects.dfki.uni-kl.de/yfcc100m/", "type": "code", - "location": "/dall_e/encoder.py:57-77" + "location": "/model_card.md:23-41" }, "39": { - "file_id": 6, - "content": "This code is creating a neural network encoder with multiple blocks. It consists of four groups, each with different number of layers and hidden size. Each group has a series of EncoderBlocks followed by a max pooling operation. The input channel size is defined based on the current block and group configuration.", + "file_id": 5, + "content": "The model is not suitable for high-fidelity image processing or general-purpose image compression. It was trained on a mix of Conceptual Captions and filtered YFCC100M datasets using specific filters, details in the paper. The dataset will not be released. Compression leads to loss of fine image details, making it unsuitable for applications requiring preserved details.", "type": "comment" }, "40": { "file_id": 6, - "content": "*[(f'block_{i + 1}', make_blk(4 * self.n_hid if i == 0 else 8 * self.n_hid, 8 * self.n_hid)) for i in blk_range],\n\t\t\t]))),\n\t\t\t('output', nn.Sequential(OrderedDict([\n\t\t\t\t('relu', nn.ReLU()),\n\t\t\t\t('conv', make_conv(8 * self.n_hid, self.vocab_size, 1, use_float16=False)),\n\t\t\t]))),\n\t\t]))\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif len(x.shape) != 4:\n\t\t\traise ValueError(f'input shape {x.shape} is not 4d')\n\t\tif x.shape[1] != self.input_channels:\n\t\t\traise ValueError(f'input has {x.shape[1]} channels but model built for {self.input_channels}')\n\t\tif x.dtype != torch.float32:\n\t\t\traise ValueError('input must have dtype torch.float32')\n\t\treturn self.blocks(x)", - "type": "code", - "location": "/dall_e/encoder.py:77-93" + "content": "/notebooks/usage.py", + "type": "filepath" }, "41": { "file_id": 6, - "content": "The code defines a neural network module that takes an input tensor and passes it through multiple blocks of convolutional layers. The output is then processed by another set of convolutional layers before returning the final result. The function also includes error checks for the shape, number of channels, and data type of the input tensor to ensure proper functioning.", - "type": "comment" + "content": "The code imports libraries, defines functions for image downloading and preprocessing, sets the target size, loads DALL-E models, processes an image, reconstructs it using the models, and displays both images.", + "type": "summary" }, "42": { - "file_id": 7, - "content": "/dall_e/utils.py", - "type": "filepath" + "file_id": 6, + "content": "#!/usr/bin/env python\n# coding: utf-8\n# In[ ]:\nimport io\nimport os, sys\nimport requests\nimport PIL\nimport torch\nimport torchvision.transforms as T\nimport torchvision.transforms.functional as TF\nfrom dall_e import map_pixels, unmap_pixels, load_model\nfrom IPython.display import display, display_markdown\ntarget_image_size = 256\ndef download_image(url):\n resp = requests.get(url)\n resp.raise_for_status()\n return PIL.Image.open(io.BytesIO(resp.content))\ndef preprocess(img):\n s = min(img.size)\n if s < target_image_size:\n raise ValueError(f'min dim for image {s} < {target_image_size}')\n r = target_image_size / s\n s = (round(r * img.size[1]), round(r * img.size[0]))\n img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)\n img = TF.center_crop(img, output_size=2 * [target_image_size])\n img = torch.unsqueeze(T.ToTensor()(img), 0)\n return map_pixels(img)\n# In[ ]:\n# This can be changed to a GPU, e.g. 'cuda:0'.\ndev = torch.device('cpu')\n# For faster load times, download these files locally and use the local paths instead.", + "type": "code", + "location": "/notebooks/usage.py:1-46" }, "43": { - "file_id": 7, - "content": "The code defines a Conv2d class with attributes and initializes weights and biases. It also includes three functions, `map_pixels`, `unmap_pixels`, and `conv2d`, for scaling, convolution operation, and padding based on kernel width.", - "type": "summary" + "file_id": 6, + "content": "This code imports necessary libraries, defines functions for downloading and preprocessing images, sets the target image size, and specifies the device (CPU) to be used.", + "type": "comment" }, "44": { - "file_id": 7, - "content": "import attr\nimport math\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nlogit_laplace_eps: float = 0.1\n@attr.s(eq=False)\nclass Conv2d(nn.Module):\n\tn_in: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tn_out: int = attr.ib(validator=lambda i, a, x: x >= 1)\n\tkw: int = attr.ib(validator=lambda i, a, x: x >= 1 and x % 2 == 1)\n\tuse_float16: bool = attr.ib(default=True)\n\tdevice: torch.device = attr.ib(default=torch.device('cpu'))\n\trequires_grad: bool = attr.ib(default=False)\n\tdef __attrs_post_init__(self) -> None:\n\t\tsuper().__init__()\n\t\tw = torch.empty((self.n_out, self.n_in, self.kw, self.kw), dtype=torch.float32,\n\t\t\tdevice=self.device, requires_grad=self.requires_grad)\n\t\tw.normal_(std=1 / math.sqrt(self.n_in * self.kw ** 2))\n\t\tb = torch.zeros((self.n_out,), dtype=torch.float32, device=self.device,\n\t\t\trequires_grad=self.requires_grad)\n\t\tself.w, self.b = nn.Parameter(w), nn.Parameter(b)\n\tdef forward(self, x: torch.Tensor) -> torch.Tensor:\n\t\tif self.use_float16 and 'cuda' in self.w.device.type:", + "file_id": 6, + "content": "enc = load_model(\"https://cdn.openai.com/dall-e/encoder.pkl\", dev)\ndec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", dev)\n# In[ ]:\nx = preprocess(download_image('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg'))\ndisplay_markdown('Original image:')\ndisplay(T.ToPILImage(mode='RGB')(x[0]))\n# In[ ]:\nimport torch.nn.functional as F\nz_logits = enc(x)\nz = torch.argmax(z_logits, axis=1)\nz = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float()\nx_stats = dec(z).float()\nx_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3]))\nx_rec = T.ToPILImage(mode='RGB')(x_rec[0])\ndisplay_markdown('Reconstructed image:')\ndisplay(x_rec)\n# In[ ]:", "type": "code", - "location": "/dall_e/utils.py:1-32" + "location": "/notebooks/usage.py:47-76" }, "45": { - "file_id": 7, - "content": "This code defines a Conv2d class that extends torch.nn.Module and implements a 2D convolutional layer using the nn.Conv2d module from PyTorch. The class has several attributes including number of input channels (n_in), number of output channels (n_out), kernel width (kw), use_float16 for whether to use float16 or float32, device for tensor storage location, and requires_grad for whether the parameters should be tracked during backpropagation. The class initializes the weight matrix (self.w) with normal distribution and bias (self.b) as zeros. It also has a forward method that applies the convolution operation on the input tensor (x).", + "file_id": 6, + "content": "This code loads the DALL-E encoder and decoder models, preprocesses an image, reconstructs it using the models, and displays both the original and reconstructed images.", "type": "comment" }, "46": { "file_id": 7, - "content": "\t\t\tif x.dtype != torch.float16:\n\t\t\t\tx = x.half()\n\t\t\tw, b = self.w.half(), self.b.half()\n\t\telse:\n\t\t\tif x.dtype != torch.float32:\n\t\t\t\tx = x.float()\n\t\t\tw, b = self.w, self.b\n\t\treturn F.conv2d(x, w, b, padding=(self.kw - 1) // 2)\ndef map_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn (1 - 2 * logit_laplace_eps) * x + logit_laplace_eps\ndef unmap_pixels(x: torch.Tensor) -> torch.Tensor:\n\tif len(x.shape) != 4:\n\t\traise ValueError('expected input to be 4d')\n\tif x.dtype != torch.float:\n\t\traise ValueError('expected input to have type float')\n\treturn torch.clamp((x - logit_laplace_eps) / (1 - 2 * logit_laplace_eps), 0, 1)", - "type": "code", - "location": "/dall_e/utils.py:33-59" + "content": "/requirements.txt", + "type": "filepath" }, "47": { "file_id": 7, - "content": "The code defines three functions: `map_pixels`, `unmap_pixels`, and `conv2d`. The `map_pixels` function scales the input tensor by a factor and adds a constant to it. It also checks if the input tensor is 4-dimensional and has the correct data type (float). Similarly, the `unmap_pixels` function scales and shifts the input tensor, and ensures the correct dimensions and data type. The `conv2d` function applies a convolution operation on the input tensor with specified weights and biases, and handles the padding based on the kernel width.", - "type": "comment" + "content": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "type": "summary" }, "48": { - "file_id": 8, - "content": "/notebooks/usage.py", - "type": "filepath" + "file_id": 7, + "content": "Pillow\nblobfile\nmypy\nnumpy\npytest\nrequests\ntorch\ntorchvision", + "type": "code", + "location": "/requirements.txt:1-8" }, "49": { - "file_id": 8, - "content": "The code imports libraries, defines functions for image downloading and preprocessing, sets the target size, loads DALL-E models, processes an image, reconstructs it using the models, and displays both images.", - "type": "summary" + "file_id": 7, + "content": "Installed packages for codebase: Pillow, blobfile, mypy, numpy, pytest, requests, torch, torchvision.", + "type": "comment" }, "50": { "file_id": 8, - "content": "#!/usr/bin/env python\n# coding: utf-8\n# In[ ]:\nimport io\nimport os, sys\nimport requests\nimport PIL\nimport torch\nimport torchvision.transforms as T\nimport torchvision.transforms.functional as TF\nfrom dall_e import map_pixels, unmap_pixels, load_model\nfrom IPython.display import display, display_markdown\ntarget_image_size = 256\ndef download_image(url):\n resp = requests.get(url)\n resp.raise_for_status()\n return PIL.Image.open(io.BytesIO(resp.content))\ndef preprocess(img):\n s = min(img.size)\n if s < target_image_size:\n raise ValueError(f'min dim for image {s} < {target_image_size}')\n r = target_image_size / s\n s = (round(r * img.size[1]), round(r * img.size[0]))\n img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)\n img = TF.center_crop(img, output_size=2 * [target_image_size])\n img = torch.unsqueeze(T.ToTensor()(img), 0)\n return map_pixels(img)\n# In[ ]:\n# This can be changed to a GPU, e.g. 'cuda:0'.\ndev = torch.device('cpu')\n# For faster load times, download these files locally and use the local paths instead.", - "type": "code", - "location": "/notebooks/usage.py:1-46" + "content": "/setup.py", + "type": "filepath" }, "51": { "file_id": 8, - "content": "This code imports necessary libraries, defines functions for downloading and preprocessing images, sets the target image size, and specifies the device (CPU) to be used.", - "type": "comment" + "content": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", + "type": "summary" }, "52": { "file_id": 8, - "content": "enc = load_model(\"https://cdn.openai.com/dall-e/encoder.pkl\", dev)\ndec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", dev)\n# In[ ]:\nx = preprocess(download_image('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg'))\ndisplay_markdown('Original image:')\ndisplay(T.ToPILImage(mode='RGB')(x[0]))\n# In[ ]:\nimport torch.nn.functional as F\nz_logits = enc(x)\nz = torch.argmax(z_logits, axis=1)\nz = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float()\nx_stats = dec(z).float()\nx_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3]))\nx_rec = T.ToPILImage(mode='RGB')(x_rec[0])\ndisplay_markdown('Reconstructed image:')\ndisplay(x_rec)\n# In[ ]:", + "content": "from setuptools import setup\ndef parse_requirements(filename):\n\tlines = (line.strip() for line in open(filename))\n\treturn [line for line in lines if line and not line.startswith(\"#\")]\nsetup(name='DALL-E',\n version='0.1',\n description='PyTorch package for the discrete VAE used for DALL·E.',\n url='http://github.com/openai/DALL-E',\n author='Aditya Ramesh',\n author_email='aramesh@openai.com',\n license='BSD',\n packages=['dall_e'],\n install_requires=parse_requirements('requirements.txt'),\n zip_safe=True)", "type": "code", - "location": "/notebooks/usage.py:47-76" + "location": "/setup.py:1-16" }, "53": { "file_id": 8, - "content": "This code loads the DALL-E encoder and decoder models, preprocesses an image, reconstructs it using the models, and displays both the original and reconstructed images.", + "content": "This code sets up a Python package named \"DALL-E\" using the setuptools module. It uses the file \"requirements.txt\" to specify installation requirements, and describes it as a PyTorch package for DALL-E's discrete VAE implementation.", "type": "comment" } } \ No newline at end of file diff --git a/docs/data/titles/0.json b/docs/data/titles/0.json new file mode 100644 index 0000000..5b64e80 --- /dev/null +++ b/docs/data/titles/0.json @@ -0,0 +1,25 @@ +{ + "/README.md": "DALL-E's Discrete VAE PyTorch Package", + "/dall_e/__init__.py": "DALL-E Model Loader", + "/dall_e/decoder.py": "DALL-E: Multigroup Convolutional Architecture", + "/dall_e/decoder.py:1-31": "DecoderBlock: Neural Network Module for Convolutions and Identity Paths", + "/dall_e/decoder.py:32-54": "PyTorch Decoder Class: Forward Pass and Residual Paths", + "/dall_e/decoder.py:55-74": "DALL-E Neural Network Initialization", + "/dall_e/decoder.py:75-94": "Encoder-Decoder Model in PyTorch", + "/dall_e/encoder.py": "Dall-E Encoder Blocks and Neural Network Modules", + "/dall_e/encoder.py:1-31": "EncoderBlock: Neural Network Encoder Module", + "/dall_e/encoder.py:32-56": "Residual Convolutional Encoder Class", + "/dall_e/encoder.py:57-77": "Multi-Group Encoder Neural Network", + "/dall_e/encoder.py:77-93": "Convolutional Neural Network Module", + "/dall_e/utils.py": "Conv2d Class and Operations", + "/dall_e/utils.py:1-32": "Conv2D Layer Implementation", + "/dall_e/utils.py:33-59": "Convolution Utilities", + "/model_card.md": "DVAE: Efficient but Fidelity Limited", + "/model_card.md:1-21": "Discrete VAE Model Card for DALL·E", + "/model_card.md:23-41": "Unsuitable for High-Fidelity Image Compression", + "/notebooks/usage.py": "DALL-E Image Processing", + "/notebooks/usage.py:1-46": "Code for Image Processing and Downloads", + "/notebooks/usage.py:47-76": "Reconstruct DALL-E Images", + "/requirements.txt": "Dependent Libraries for Codebase", + "/setup.py": "DALL-E: Discrete VAE PyTorch Package Setup" +} \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 2254d8b..13fdafc 100644 --- a/docs/index.html +++ b/docs/index.html @@ -114,6 +114,52 @@ /* html, body{ max-width: 980px; } */ + /* Add custom styles for the drawer button */ + #drawerButton { + position: absolute; + top: 10px; + right: 10px; + z-index: 1000; + } + + .grayed-out-button { + color: #ccc; + /* Set the text color to gray */ + background-color: #f4f4f4; + /* Set a light gray background color */ + border: 1px solid #ccc; + /* Add a gray border */ + cursor: not-allowed; + /* Change the cursor to indicate the button is disabled */ + pointer-events: none; + /* Disable pointer events to prevent interaction */ + } + + .grayed-out-button i { + color: #ccc; + /* Set the icon color to gray */ + } + + + #drawer { + /* to visualize the alignment of buttons and text */ + background-color: white; + margin: auto; + /* margin: 1.5%; */ + /* margin-right: 10%; */ + text-wrap: wrap; + /* margin-bottom: 10px; */ + } + + #drawer_mask { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.3); + /* background-color: rgba(0, 0, 0, 0.1); */ + } #progress-overlay { position: fixed; @@ -153,6 +199,12 @@ margin-bottom: 10px; } + /* + h3, + h4 { + line-height: 0.5; + } */ + p { line-height: 1.5; } @@ -163,6 +215,7 @@ /* Styles for mobile devices */ @media (max-width: 767px) { + #progress-bar { width: 60%; @@ -189,8 +242,8 @@ .right-half { /*padding-left: 10px;*/ padding-right: 10px; - overflow-y:auto; - overflow-x:auto; + overflow-y: auto; + overflow-x: auto; } .left-half { @@ -207,6 +260,7 @@ } #searchInput { + z-index: 10; text-indent: 15px; } @@ -219,11 +273,11 @@ @media (min-width: 768px) { #progress-bar { - width: 50%; } #searchInput { + z-index: 10; text-indent: 20px; } @@ -248,7 +302,7 @@ border: 1px solid #ccc; /* justify-content: space-between; */ /* flex-direction: row; */ - height:min-content; + height: min-content; /* align-items: flex-start; */ } @@ -266,8 +320,8 @@ /* height:100%; */ /* align-self: stretch; */ - overflow-x:auto; - overflow-y:scroll; + overflow-x: auto; + overflow-y: scroll; } .right-half { @@ -303,8 +357,8 @@ /* useless now */ pre { - overflow-x:visible!important; - overflow-y:visible!important; + overflow-x: visible !important; + overflow-y: visible !important; /* overflow-x: auto; */ white-space: pre-wrap; white-space: -moz-pre-wrap; @@ -313,6 +367,10 @@ word-wrap: break-word; } + button { + cursor: pointer; + } + .codelink:hover { cursor: pointer; text-decoration: underline; @@ -375,10 +433,37 @@ border: 1px solid #ccc; } - ul { + .input-group { + position: relative; + display: flex; + /* flex-wrap: wrap; */ + /* align-items: stretch; */ + width: 100%; + justify-content: space-between; + align-items: center; + } + + .input-group-button:hover { + cursor: pointer; + } + + #sidebar-button:hover { + cursor: pointer; + } + + .input-group-button { + border: 1px solid #ccc; + /* padding:auto; */ + /* flex:1; */ + z-index: 9; + padding: 10px; + margin: auto; + } + + ul.search-results { flex: 1; /* Fill the remaining space */ - overflow-y: auto; + overflow-y: scroll; /* Enable vertical scrolling */ list-style: none; padding: 0; @@ -386,452 +471,740 @@ margin-top: 0; } - ul li { + ul.search-results li { background-color: #f2f2f2; margin-bottom: 10px; /*border-radius: 10px;*/ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1); } + + #drawer_content>div>span:hover { + cursor: pointer; + text-decoration: underline; + } -
    - -
    -
    -
    + + + + +
    +
    +
    +
    +
    + + +
    +

    +
    + Document index of: +
    +
    + + +
    +

    +
    + +
    - - -
    -

    Document Index of: - -
    - - -
    -

    - - -
    -
      - -
      + + //document.addEventListener('DOMContentLoaded', registerSearchEventListener); + // waitForDOMContentLoaded(); + registerSearchEventListener() + // console.log("event listener registered") + } + async_main() + + diff --git a/docs/metadata.json b/docs/metadata.json index cc3060f..c774c7c 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -7,46 +7,46 @@ "0": { "filepath": "/README.md", "entry_id": 0, - "language_id": "plain-text" + "language_id": "markdown" }, "1": { - "filepath": "/model_card.md", + "filepath": "/dall_e/__init__.py", "entry_id": 4, - "language_id": "markdown" + "language_id": "python" }, "2": { - "filepath": "/requirements.txt", - "entry_id": 10, - "language_id": "plain-text" + "filepath": "/dall_e/decoder.py", + "entry_id": 8, + "language_id": "python" }, "3": { - "filepath": "/setup.py", - "entry_id": 14, + "filepath": "/dall_e/encoder.py", + "entry_id": 18, "language_id": "python" }, "4": { - "filepath": "/dall_e/__init__.py", - "entry_id": 18, + "filepath": "/dall_e/utils.py", + "entry_id": 28, "language_id": "python" }, "5": { - "filepath": "/dall_e/decoder.py", - "entry_id": 22, - "language_id": "python" + "filepath": "/model_card.md", + "entry_id": 34, + "language_id": "markdown" }, "6": { - "filepath": "/dall_e/encoder.py", - "entry_id": 32, + "filepath": "/notebooks/usage.py", + "entry_id": 40, "language_id": "python" }, "7": { - "filepath": "/dall_e/utils.py", - "entry_id": 42, - "language_id": "python" + "filepath": "/requirements.txt", + "entry_id": 46, + "language_id": "plain-text" }, "8": { - "filepath": "/notebooks/usage.py", - "entry_id": 48, + "filepath": "/setup.py", + "entry_id": 50, "language_id": "python" } }, diff --git a/docs/metadata_title.json b/docs/metadata_title.json new file mode 100644 index 0000000..0103dd2 --- /dev/null +++ b/docs/metadata_title.json @@ -0,0 +1 @@ +{"split_count": 1} \ No newline at end of file diff --git a/docs/tree.html b/docs/tree.html index d79d023..abc5319 100644 --- a/docs/tree.html +++ b/docs/tree.html @@ -66,29 +66,32 @@ ul { list-style: none; } + #feeling-lucky:hover{ + cursor: pointer; + }
      -

      Project Structure of: openai/DALL-E

      +

      Project structure of: openai/DALL-E

      • DALL-E DALL-E: AI model, codebase, text to image, PyTorch, requirements.
          -
        • README.md DALL-E's VAE package, no transformer for text to image. Install with pip.
        • dall_e DALL-E: AI model, PyTorch decoder-encoder, neural encoder with residual paths.
            -
          • __init__.py Load DALL-E model from URL or local file.
          • -
          • decoder.py DALL-E: Decoder PyTorch Model, Encoder-Decoder, ConvLayers
          • -
          • encoder.py Encoder neural network blocks with residual paths.
          • -
          • utils.py DallE utilities: Conv2d class, scale, conv, pad.
          • +
          • __init__.py Load DALL-E model from URL or local file.
          • +
          • decoder.py DALL-E: Decoder PyTorch Model, Encoder-Decoder, ConvLayers
          • +
          • encoder.py Encoder neural network blocks with residual paths.
          • +
          • utils.py DallE utilities: Conv2d class, scale, conv, pad.
        • -
        • model_card.md DALL·E's dVAE: Reduces memory, loses fine details.
        • +
        • model_card.md DALL·E's dVAE: Reduces memory, loses fine details.
        • notebooks
            -
          • usage.py Image download, preprocessing, DALL-E model usage and display.
          • +
          • usage.py Image download, preprocessing, DALL-E model usage and display.
        • -
        • requirements.txt Essential packages for codebase.
        • -
        • setup.py Setup Python package "DALL-E" with requirements from "requirements.txt".
        • +
        • README.md DALL-E's VAE package, no transformer for text to image. Install with pip.
        • +
        • requirements.txt Essential packages for codebase.
        • +
        • setup.py Setup Python package "DALL-E" with requirements from "requirements.txt".
      @@ -108,14 +111,27 @@

      Project Structure of: ope const queryParams = getQueryParams(window.location.search); const show_full = queryParams.full == "true"; + //const is_random = queryParams.random == 'true'; + function feelingLucky() { + var elements = document.getElementsByClassName("file_link"); + var randomIndex = Math.floor(Math.random() * elements.length); + window.location = elements[randomIndex].href; + } + //if (is_random) {feelingLucky();} + document.getElementById("feeling-lucky").onclick = feelingLucky; if (!show_full) { const spans = document.querySelectorAll('span'); for (let span of spans) { if (span.getAttribute("hierarchy") == '0') { continue } - toggleVisibility(span); } + } else { + const expand_elem = document.getElementById('expand-tree'); + expand_elem.setAttribute("href", "tree.html"); + expand_elem.setAttribute("title", "Undo expand"); + expand_elem.innerHTML = '' } + From 8e8103434a80b18ebca6a7e9ab683044757c063f Mon Sep 17 00:00:00 2001 From: GitJournal Date: Sat, 13 Jan 2024 20:46:20 +0800 Subject: [PATCH 11/28] update --- docs/codeview.html | 17 ++++++++++++++++- docs/index.html | 34 ++++++++++++++++++++++++++-------- docs/tree.html | 1 + 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index d0da4af..9785df3 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -9,6 +9,7 @@ href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='currentColor' class='bi bi-code-square' viewBox='0 0 16 16'%3E%3Cpath d='M14 1a1 1 0 0 1 1 1v12a1 1 0 0 1-1 1H2a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1zM2 0a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2z'/%3E%3Cpath d='M6.854 4.646a.5.5 0 0 1 0 .708L4.207 8l2.647 2.646a.5.5 0 0 1-.708.708l-3-3a.5.5 0 0 1 0-.708l3-3a.5.5 0 0 1 .708 0zm2.292 0a.5.5 0 0 0 0 .708L11.793 8l-2.647 2.646a.5.5 0 0 0 .708.708l3-3a.5.5 0 0 0 0-.708l-3-3a.5.5 0 0 0-.708 0z'/%3E%3C/svg%3E" type="image/svg+xml"> Code View + @@ -405,6 +406,13 @@ const language = queryParams.language; const code_path = queryParams.file; const project_name = queryParams.project; + const keywords = queryParams.keywords; + var keywordList + try { + keywordList = JSON.parse(keywords); + + } catch (e) { } + document.title = `Code view of: ${code_path} - Project: ${project_name}`; const h1_element = document.getElementById('code-path'); // h1_element.textContent = code_path.slice('src/'.length); // debugger; @@ -502,10 +510,17 @@ } else { // Prism.highlightAll(); // Prism.highlightElement(pre_elem); - // setTimeout(() => {pre_elem.classList.add("wrap_pre");}, 1000); // pre_elem.style.whiteSpace="pre-wrap !important"; } + + if (keywordList != undefined) { + setInterval(() => { + const markInstance = new Mark(document.getElementById('code-div')); + markInstance.unmark(); // Clear previous marks + markInstance.mark(keywordList); + }, 1000) + } // applyHash(); // Prism.highlightElement(pre_elem, () => {applyHash()}); // Prism.highlightElement(pre_elem).then(applyHash); diff --git a/docs/index.html b/docs/index.html index 13fdafc..48b7d04 100644 --- a/docs/index.html +++ b/docs/index.html @@ -536,7 +536,7 @@

      -

      @@ -619,7 +619,8 @@

      // Position the target element at the same top and left coordinates as the reference element drawer_title.style.position = "absolute"; - drawer_title.style.top = topPosition + "px"; + drawer_title.style.top = topPosition * 1 + "px"; + // drawer_title.style.top = topPosition*1.5 + "px"; if (pageWidth > 768) { drawer_title.style.left = pageWidth * 0.02 + "px"; // drawer_title.style.left = pageWidth * 0.03 + "px"; @@ -673,13 +674,14 @@

      const progressOverlay = document.getElementById('progress-overlay'); const progressBar = document.querySelector('.progress'); var isDebugMode = false; - function navigateToPage(base_filepath, language_id, project_id, detail_filepath = "") { + function navigateToPage(base_filepath, language_id, project_id, keywords, detail_filepath = "") { // Use a relative path to navigate to a specific page let page_param = "codeview.html"; let file_param = 'src' + base_filepath; file_param = encodeURIComponent(file_param); let language_param = language_id - let jump_link = `${page_param}?file=${file_param}&language=${language_id}&project=${project_id}`; + let keywords_encoded = encodeURIComponent(JSON.stringify(keywords)); + let jump_link = `${page_param}?file=${file_param}&language=${language_id}&project=${project_id}&keywords=${keywords_encoded}`; if (detail_filepath !== "") { let location_range = detail_filepath.slice(base_filepath.length + 1); let location_param = `mycode.${location_range}`; @@ -715,6 +717,8 @@

      const github_url = metadata.url.full; const project_id = metadata.project_name; + const myDefaultTitle = `Document index of: ${project_id}` + document.title = myDefaultTitle; const github_partial_url = metadata.url.partial; const file_mapping = metadata.file_mapping const split_count = metadata.split_count @@ -815,11 +819,16 @@

      return inputString.split(searchValue).join(replaceValue); } function getSubTerms(it) { - var m_str = it + it = it.trim(); + var ret = [it] for (const sym of englishSymbols) { + var m_str = it m_str = replaceAll(m_str, sym, " "); + ret.concat(m_str.split(" ")) } - return m_str.split(" "); + ret = ret.concat(it.split(" ")) + ret = ret.concat(m_str.split(" ")); + return ret } const searchInputElem = document.getElementById('searchInput'); @@ -876,6 +885,7 @@

      // console.log('search term file index:', searchTermFileIndex) if (searchTermFileIndex != -1) { isFileSearch = true; + document.title = `File: ${searchTerm} - ${myDefaultTitle}` // do something with the buttons // file_previous file_next if (searchTermFileIndex != 0) { @@ -954,6 +964,7 @@

      }); }); var isDesktopScreen = window.innerWidth > 768; + var title_topics = [] for (var file_id of searchResultItemIds) { const it = searchResultItems[file_id]; const item = document.createElement('li'); @@ -966,6 +977,7 @@

      const file_title_elem_id = generateUUID() file_title_elem.setAttribute('id', file_title_elem_id) const mylocation = it.header.filepath.slice(1) + title_topics.push(title_data["/" + mylocation]) overview_file_title_elem.innerHTML = `${title_data["/" + mylocation]}`; overview_file_title_elem.onclick = () => { @@ -982,7 +994,8 @@

      file_title_elem.appendChild(file_title_span); // location_p.innerHTML = (mylocation); - file_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)})`) + file_title_span.onclick = () => { navigateToPage(it.header.filepath, it.language_id, project_id, searchHighlightTerms) } + // file_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)},${JSON.stringify(project_id)})`) // location_p.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)})`) // search_header.appendChild(location_p); search_header.appendChild(file_title_elem); @@ -1038,7 +1051,8 @@

      // code_location_p.className = "monospace-text codelink"; // code_location_p.innerHTML = (pair_location_text); code_title_span.setAttribute("title", pair_location_text) - code_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)}, ${JSON.stringify(pair.location)})`) + code_title_span.onclick = () => { navigateToPage(it.header.filepath, it.language_id, project_id, searchHighlightTerms, pair.location) } + // code_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)}, ${JSON.stringify(pair.location)})`) // code_location_p.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)}, ${JSON.stringify(pair.location)})`) // pair_left.appendChild(code_location_p); if (pair_title_text != undefined) { @@ -1081,6 +1095,8 @@

      } Prism.highlightAllUnder(searchResults); + const title_topics_joined = title_topics.join(" | ") + document.title = `${document.title} - Topics: ${title_topics_joined}` // let's try understand that. const markInstance = new Mark(document.getElementById('searchResults')); markInstance.unmark(); // Clear previous marks @@ -1133,9 +1149,11 @@

      if (inputBoxText.trim() !== "") { // Construct the new URL with the updated query string newURL = `${window.location.protocol}//${window.location.host}${window.location.pathname}?q=${inputBoxText}`; + document.title = `Query: ${inputBoxText.trim()} - ${myDefaultTitle}`; } else { newURL = `${window.location.protocol}//${window.location.host}${window.location.pathname}`; + document.title = myDefaultTitle; } diff --git a/docs/tree.html b/docs/tree.html index abc5319..0692daa 100644 --- a/docs/tree.html +++ b/docs/tree.html @@ -6,6 +6,7 @@ + Project structure of: openai/DALL-E