import cv2
import matplotlib.pyplot as plt

frame = cv2.imread('../images/mask_samples.jpg')

plt.figure(figsize=(10, 10))
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

frame = cv2.imread('../images/patches.png')

plt.figure(figsize=(20, 10))
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

import os
import boto3
import deltalake as dl

os.environ['AWS_EC2_METADATA_DISABLED'] = 'true'

session = boto3.Session(profile_name='default')
credentials = session.get_credentials()
credentials = credentials.get_frozen_credentials()

storage_options = {
    'AWS_REGION': 'us-west-1',
    'AWS_ACCESS_KEY_ID': credentials.access_key,
    'AWS_SECRET_ACCESS_KEY': credentials.secret_key,
    'AWS_S3_ALLOW_UNSAFE_RENAME': 'true'
}
    
patch_annos = dl.DeltaTable(
    table_uri='s3a://coffee-dataset/lake/clear_leaf_patch_annotations',
    storage_options=storage_options
).to_pandas()

patch_annos

unique_images = patch_annos['image_path'].unique()

healthy_patches = patch_annos[patch_annos['defective'] == 0]
defective_patches = patch_annos[patch_annos['defective'] == 1]

print(f'Number of unique images: {len(unique_images)}')
print(f'Number of healthy patches: {len(healthy_patches)}')
print(f'Number of defective patches: {len(defective_patches)}')

Number of unique images: 192
Number of healthy patches: 8270
Number of defective patches: 5076

from shapely.geometry import Polygon

t = 64
polygon = Polygon(contour)

x1, y1, x2, y2 = patch
box = Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])

greater_than_half = box.intersection(polygon).area / box.area >= 0.5
inside_polygon = polygon.contains(box)

if greater_than_half or inside_polygon:
    tile = mask[y1:y2, x1:x2]

    # if any size is less than t//2, skip
    if tile.shape[0] < t//2 or tile.shape[1] < t//2:
        continue

    # if tile is all black, skip
    if np.all(tile == 0):
        continue

    pads = ((0, t - tile.shape[0]), (0, t - tile.shape[1]), (0, 0))
    tile = np.pad(tile, pads, 'constant', constant_values=(0, 0))
    
    # save the tile to disk!

df_majority = df[df['defective'] == 1]
df_minority = df[df['defective'] == 0]

if len(df_majority) < len(df_minority):
    df_majority, df_minority = df_minority, df_majority


df_majority_downsampled = resample(
    df_majority,
    replace=False,
    n_samples=len(df_minority),
    random_state=seed
)

# combine majority class with upsampled minority class
df = pd.concat([df_majority_downsampled, df_minority])

# set column 'sett' to be training, validation, or test
df['sett'] = np.random.choice(['train', 'val', 'test'], size=len(df), p=[0.7, 0.1, 0.2])

import torch.nn as nn
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

class PatchModel(nn.Module):
    def __init__(self):
        super(PatchModel, self).__init__()
        
        # Load the EfficientNet-B0 model. It's default weights are being used for transfer learning.
        self.model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
        
        # Freeze all the layers to increase the speed of training.
        for param in self.model.parameters():
            param.requires_grad = False
            
        # Unfreeze the last two layers and the classifier layer.
        # These layers will be optimized on our dataset during training.
        for param in self.model.features[-2:].parameters():
            param.requires_grad = True
        for param in self.model.classifier.parameters():
            param.requires_grad = True

        # Modify the classifier layer to output a single value.
        # I also increase the dropout rate to 0.5 to prevent overfitting.
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(1280, 1)
        )

        # Move this model to the GPU!
        self.model = self.model.cuda().float()

    def forward(self, x):
        return self.model(x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

import torch as t
import torch.nn as nn
from torchmetrics import AUROC

epochs = 60
learning_rate = 0.0008

model = PatchModel()

optimizer = t.optim.Adam(model.parameters(), lr=learning_rate)

scheduler = t.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

loss_fn = nn.BCEWithLogitsLoss()

scaler = t.cuda.amp.GradScaler()

with t.autograd.set_detect_anomaly(False):
    
    for epoch in range(epochs):
            
        model.train()
        train_auc = AUROC(task='binary')
        running_loss = 0.0

        for masks, labels in train_dataset:
            masks, labels = masks.cuda(), labels.cuda()

            optimizer.zero_grad()
            with t.cuda.amp.autocast():
                output = model(masks).squeeze()
                labels = labels.float()
                loss = loss_fn(output, labels)
                train_auc.update(output, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            masks, labels = masks.cpu(), labels.cpu()
            
            running_loss += loss

        scheduler.step()
        
        train_auc = train_auc.compute()
        epoch_loss = running_loss / (len(train_dataset.dataset) / train_dataset.batch_size)

plt.figure(figsize=(20, 5))

plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(cv2.imread('../images/kina_metrics/kina_torch_auc.png'), cv2.COLOR_BGR2RGB))
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(cv2.imread('../images/kina_metrics/kina_torch_loss.png'), cv2.COLOR_BGR2RGB))
plt.axis('off')

plt.show()

plt.figure(figsize=(10, 10))

plt.imshow(cv2.cvtColor(cv2.imread('../images/kina_metrics/kina_torch_confusion_matrix.png'), cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

plt.figure(figsize=(20, 10))

plt.imshow(cv2.cvtColor(cv2.imread('../images/kina_metrics/color_xgb_metrics.png'), cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

def dominant_colors(image_path):
    image = Image.open(image_path)
    paletted = image.convert('P', palette=Image.ADAPTIVE, colors=64)
    palette = paletted.getpalette()
    color_idxs = paletted.getcolors()
    colors = np.array([palette[idx*3:idx*3+3] for _, idx in color_idxs]) / 255
    colors = colors[np.argsort(np.linalg.norm(colors, axis=1))]
    colors = colors.flatten().tolist()
    colors += [0] * (192 - len(colors))
    return colors

def extract_patches(mask, t=64, stride=8):
    coords, tiles = [], []
    
    for x in range(0, mask.shape[1] - t, stride):
        for y in range(0, mask.shape[0] - t, stride):
            tile = mask[y:y+t, x:x+t]
            coords.append([x, y])
            tiles.append(tile)

    return coords, tiles

coords, tiles = extract_patches(mask)

tiles = t.stack([t.tensor(tile, dtype=t.float32).permute(2, 0, 1) / 255.0 for tile in tiles])

predictions = t.sigmoid(model(tiles))

import numpy as np

t = 3
confidence = 0.5

# we are assuming a stride of 1 here.
# these are the probabilities of each pixel
# that we get from our fake model.
tiles = np.array([
    [0.1, 0.4, 0.7],
    [0.4, 0.7, 0.9],
    [0.7, 0.9, 0.2],
    [0.9, 0.2, 0.1],
    [0.2, 0.1, 0.2]
], dtype=np.float32)


mask = np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.float32)
norm = np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.float32)

for i in range(len(tiles)):
    p = tiles[i]

    # add the probabilities to the mask
    mask[i:i+t] += p
    
    # add the number of times that pixel has been added to the mask
    norm[i:i+t] += 1

# divide the mask by the number of times each pixel has been added to the mask
heatmap = mask / norm

# threshold the heatmap to get the segmentation
seg = (heatmap > confidence).astype(np.uint8)

mask_plot = (mask / mask.max() * 255).astype(np.uint8).reshape(1, -1)
norm_plot = (norm / norm.max() * 255).astype(np.uint8).reshape(1, -1)
heat_plot = (heatmap * 255).astype(np.uint8).reshape(1, -1)
seg_plot = (seg * 255).astype(np.uint8).reshape(1, -1)

plt.figure(figsize=(20, 10))
plt.subplot(1, 4, 1)
plt.imshow(mask_plot, cmap='Blues')
plt.xticks([]), plt.yticks([])
plt.title('Mask')
plt.subplot(1, 4, 2)
plt.imshow(norm_plot, cmap='Blues')
plt.xticks([]), plt.yticks([])
plt.title('Norm')
plt.subplot(1, 4, 3)
plt.imshow(heat_plot, cmap='Blues')
plt.xticks([]), plt.yticks([])
plt.title('Heatmap')
plt.subplot(1, 4, 4)
plt.imshow(seg_plot, cmap='Blues')
plt.xticks([]), plt.yticks([])
plt.title('Segmentation')
plt.show()

from pytorch_toolbelt.inference.tiles import ImageSlicer, TileMerger
from pytorch_toolbelt.utils.torch_utils import to_numpy

t = 64
stride = 16
confidence = 0.5

tiler = ImageSlicer(mask.shape, tile_size=(t, t), tile_step=(stride, stride))
tiles = torch.stack([torch.tensor(tile, dtype=torch.float32).permute(2, 0, 1) / 255.0 for tile in tiler.split(mask)])

tiles = tiles.cuda()
merger = TileMerger(tiler.target_shape, 1, tiler.weight, device='cuda')

predictions = torch.sigmoid(model(tiles))

# perform the integration we saw above
merger.integrate_batch(predictions, tiler.crops)

# do the division! We have a heatmap now.
heatmap = np.moveaxis(to_numpy(merger.merge()), 0, -1)

# Our binary segmentation is just a thresholded version of the heatmap.
segmentation = (heatmap > confidence).astype(np.uint8)

plt.figure(figsize=(20, 10))
plt.imshow(cv2.cvtColor(cv2.imread('../images/kina_threshold_comparison.png'), cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

plt.figure(figsize=(20, 10))
plt.imshow(cv2.cvtColor(cv2.imread('../images/example_inferenced.jpg'), cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

	image_path	hash	defective	patch
0	coffee-dataset/raw_images/bay_view_dead_leaves...	ec2785912ccfc157520dc7e44985fbd6	1	[230, 77, 294, 141]
1	coffee-dataset/raw_images/bay_view_dead_leaves...	ec2785912ccfc157520dc7e44985fbd6	1	[179, 76, 243, 140]
2	coffee-dataset/raw_images/bay_view_dead_leaves...	ec2785912ccfc157520dc7e44985fbd6	1	[115, 75, 179, 139]
3	coffee-dataset/raw_images/bay_view_dead_leaves...	ec2785912ccfc157520dc7e44985fbd6	1	[62, 72, 126, 136]
4	coffee-dataset/raw_images/bay_view_dead_leaves...	ec2785912ccfc157520dc7e44985fbd6	1	[99, 98, 163, 162]
...	...	...	...	...
13341	coffee-dataset/raw_images/milolii_luis_farm/20...	d36ae6fcc22a404233638f984e7b39b7	0	[500, 382, 564, 446]
13342	coffee-dataset/raw_images/milolii_luis_farm/20...	d36ae6fcc22a404233638f984e7b39b7	0	[527, 304, 591, 368]
13343	coffee-dataset/raw_images/milolii_luis_farm/20...	d36ae6fcc22a404233638f984e7b39b7	0	[600, 249, 664, 313]
13344	coffee-dataset/raw_images/milolii_luis_farm/20...	d36ae6fcc22a404233638f984e7b39b7	0	[250, 102, 314, 166]
13345	coffee-dataset/raw_images/milolii_luis_farm/20...	d36ae6fcc22a404233638f984e7b39b7	0	[175, 66, 239, 130]

Tile-Based Semantic Segmentation of Coffee Leaves¶

Data Annotation¶

Data Preprocessing And Augmentation¶

Modeling¶

Tiling¶

Conclusion¶

Citations¶