-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathSource_code.py
113 lines (94 loc) · 4.73 KB
/
Source_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#creqd lib
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import random
# Load the pre-trained-model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
def add_aesthetic_flair(description):
# Keywords to identify specific styles for the caption
nature_keywords = ["mountain", "sea", "sky", "ocean", "nature", "forest", "beach", "trees", "jungle", "wildlife", "wilderness", "river", "canyon", "hills", "valley", "waterfall", "lake"]
city_keywords = ["city", "urban", "street", "lights", "buildings", "skyscrapers", "cityscape", "skyline","construction", "metropolis", "traffic", "subway", "metro","plaza","market","billboards","nightlife","tower"]
people_keywords = ["person", "people", "portrait", "smiling", "group", "friends", "family","crowd","community","society","gathering","celebration","adults","children","elderly","men","women","employees","volunteer","friendship"]
# Defining a list of possible emotional or aesthetic phrases
aesthetic_tones = [
"where time stands still",
"capturing the essence of life",
"a perfect moment frozen in time",
"an ode to beauty",
"an unforgettable experience",
"whispers of serenity",
"an adventure in every frame",
"a snapshot of pure bliss",
"faded photo of a classic diner",
"single plant in a white pot",
"cozy living room with woven rugs",
"neon lights outside an arcade",
"black-and-white portrait in soft light",
"couple walking through a flower-filled park",
"dimly lit room with candlelight",
"graffiti-covered walls in an abandoned warehouse",
"sleek kitchen with exposed brick",
"woman on a rooftop at sunset",
"soft pink bedroom with mint green accents",
"neon-lit city skyline at night",
"mist rolling through a quiet forest",
"wooden cabin with a roaring fireplace",
"floating island with waterfalls",
"busy street with neon signs and traffic",
"dew-covered leaves in the forest",
"swirl of colors and shapes in motion",
"balloons flying in a sunny park",
"marble foyer with crystal chandeliers"
]
# Identify the tone based on image description
tone = ""
if any(keyword in description.lower() for keyword in nature_keywords):
tone = "Nature’s beauty is timeless and serene."
elif any(keyword in description.lower() for keyword in city_keywords):
tone = "The hustle and bustle of city life, captured in a moment."
elif any(keyword in description.lower() for keyword in people_keywords):
tone = "A beautiful moment with loved ones, filled with joy and laughter."
else:
tone = random.choice(aesthetic_tones)
# Combine the description with the aesthetic tone
aesthetic_caption = f"{description} \n\n{tone}"
# Split the caption into smaller lines, aiming for 2 to 5 lines
lines = aesthetic_caption.split("\n")
num_lines = random.randint(2, 5) # Randomly choose how many lines to use (between 2 and 5)
# Ensure that the number of lines does not exceed the available content
final_caption = "\n".join(lines[:num_lines])
# Optionally add hashtags at the end
hashtags = "#photography #art #inspiration #nature #urban #portrait #adventure"
# Combine the caption with hashtags
final_caption_with_hashtags = f"{final_caption}\n\n{hashtags}"
# Limit the caption to Instagram's maximum character length (2200 characters)
if len(final_caption_with_hashtags) > 2200:
final_caption_with_hashtags = final_caption_with_hashtags[:2200] + "..."
return final_caption_with_hashtags
# Function to generate the base caption from the BLIP model
def generate_caption(image):
inputs = processor(images=image, return_tensors="pt")
outputs = model.generate(**inputs)
base_caption = processor.decode(outputs[0], skip_special_tokens=True)
full_caption = add_aesthetic_flair(base_caption)
return full_caption
def caption_image(image):
"""
Takes a PIL Image input and returns a customized aesthetic caption.
"""
try:
caption = generate_caption(image)
return caption
except Exception as e:
return f"An error occurred: {str(e)}"
# Gradio interface setup
iface = gr.Interface(
fn=caption_image,
inputs=gr.Image(type="pil"),
outputs="text",
title="Aesthetic Image Captioning",
description="Upload an image to generate a customized aesthetic caption for your Instagram post."
)
iface.launch()