-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinference.py
175 lines (145 loc) · 6.49 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import argparse
import os
import pathlib
import pickle
import sys
import yaml
sys.path.append(".")
sys.path.append("CGIC/models/model")
import torch
import requests
import numpy as np
from PIL import Image
from io import BytesIO
from typing import Optional, Tuple, Union
from tqdm import tqdm
from pathlib import Path
import json
from omegaconf import OmegaConf
import torchvision
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from torch.nn.functional import mse_loss, l1_loss
from torch.utils.data import DataLoader, Dataset
from CGIC.models.model import CGIC
from CGIC.tools.indices_coding import HuffmanCoding as HuffmanCoding # Huffman with frequency
from CGIC.tools.mask_coding import BinaryCoding
class ImageDataset(Dataset):
def __init__(
self,
imagenet_images_dir: Union[str, Path],
target_size: int = 512,
images_range: Optional[Tuple[int, int]] = (0, -1)
) -> None:
super().__init__()
self.target_size = target_size
imagenet_images_dir = Path(imagenet_images_dir)
self.image_paths = sorted([p for p in imagenet_images_dir.glob('**/*') if self._is_image_path(p)])
if images_range[1] > 0:
self.image_paths = self.image_paths[images_range[0]:images_range[1]]
print(f'Found {len(self.image_paths)} images to reconstruct')
self.transform = T.Compose([
# T.RandomResizedCrop(self.target_size, scale=(1., 1.), ratio=(1., 1.), interpolation=Image.Resampling.BICUBIC),
T.ToTensor()
])
def __getitem__(self, index: int) -> torch.Tensor:
image = Image.open(self.image_paths[index])
# image = image.convert('RGB')
image = self._resize_and_crop(image)
image = self.transform(image)
return image
def _resize_and_crop(self, img):
w, h = img.size
hn = h // 16
wn = w // 16
img = TF.center_crop(img, output_size=[int(16*hn), int(wn*16)])
return img
def __len__(self) -> int:
return len(self.image_paths)
def _is_image_path(self, path: Path) -> bool:
ext = path.name[path.name.rfind('.')+1:]
if (path.is_dir()
or path.name.startswith('.')
or len(ext) == 0
or ext.lower() not in ('jpg', 'jpeg', 'png')):
return False
return True
def load_config(config_path, display=False):
config = OmegaConf.load(config_path)
if display:
print(yaml.dump(OmegaConf.to_container(config)))
return config
def load_model(config, ckpt_path=None):
model = CGIC(**config.model.params)
if ckpt_path is not None:
sd = torch.load(ckpt_path, map_location="cpu")["state_dict"]
missing, unexpected = model.load_state_dict(sd, strict=False)
print(f"Restored from {ckpt_path}")
return model.eval()
def write_images(
images: torch.Tensor,
output_dir: Path,
i: int,
batch_size: int,
start_offset: int,
bpp: None,
) -> None:
images = (255 * images.permute(0, 2, 3, 1).detach().cpu().numpy()).astype(np.uint8)
for j, img in enumerate(images):
k = i * batch_size + j + start_offset
img = Image.fromarray(img)
if bpp!=None:
img.save(output_dir / f'{k:03d}_{bpp:05f}.png')
else:
img.save(output_dir / f'{k:03d}.png')
def get_parser(**parser_kwargs):
parser = argparse.ArgumentParser(**parser_kwargs)
parser.add_argument('-i', '--images_dir', type=str, default='../dataset/Kodak', required=False, help='Path to the root directory where the images are')
parser.add_argument('-b', '--batch_size', type=int, default=1, help='Number of images in a minibatch')
parser.add_argument('-n', '--num_workers', type=int, default=1, help='Number of worker threads to load the images')
parser.add_argument('-s', '--image_size', type=int, default=512, help='Size of the reconstructed image')
parser.add_argument('-o', '--output_dir', type=str, default='./output', help='Path to a directory where the outputs will be saved')
parser.add_argument('-w', '--write_partiton_map', action='store_true', help='If set, the partition maps will be saved to the output directory')
parser.add_argument('-r', '--images_range', type=int, nargs=2, default=(0, -1),
help=('Optional. To use provide two values indicating the starting and ending indices of the images to be loaded.'
' Only images within this range will be loaded. Useful for manually sharding the dataset if running all images'
' at once would take too much time.'))
return parser
def main():
parser = get_parser()
opt, unknown = parser.parse_known_args()
dataset = ImageDataset(opt.images_dir, opt.image_size, opt.images_range)
dataloader = DataLoader(dataset, opt.batch_size, num_workers=opt.num_workers)
config = load_config("./configs/config_inference.yaml", display=True)
model = load_model(config).to('cuda')
frequency = model.quantize.embedding_counter
h_string = HuffmanCoding(frequency)
h_mask = BinaryCoding()
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('number of params (M): %.2f' % (n_parameters / 1.e6))
Path(opt.output_dir).mkdir(parents=True, exist_ok=True)
rec_output_dir = Path(opt.output_dir) / 'reconstructed'
rec_output_dir.mkdir(parents=True, exist_ok=True)
save_img = False
if opt.write_partiton_map:
partition_output_dir = Path(opt.output_dir) / 'partition_map'
partition_output_dir.mkdir(parents=True, exist_ok=True)
save_img = True
bpp_sum = 0.
with open(os.path.join(opt.output_dir, 'bpp.txt'),'a') as f:
for i, x in enumerate(tqdm(dataloader)):
if torch.cuda.is_available():
x = x.cuda()
with torch.no_grad():
x_rec, bpp, partition_map = model.compress(x, opt.output_dir, h_string, h_mask, save_img)
x_rec = x_rec.clamp(0, 1)
write_images(x_rec, rec_output_dir, i, opt.batch_size, opt.images_range[0], bpp=bpp)
if opt.write_partiton_map:
write_images(partition_map, partition_output_dir, i, opt.batch_size, opt.images_range[0], None)
bpp_sum += bpp
f.write(f'image: {i} \t bpp: {bpp}\n')
f.write(f'Bpp Average: {bpp_sum/len(dataset)}')
print(f'Bpp Average: {bpp_sum/len(dataset)}')
f.close()
if __name__ == '__main__':
main()