You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import cv2
from openai import OpenAI
client = OpenAI(base_url='http://<ip>:80/v1')
model="OpenGVLab/InternVL2-26B"
#client = OpenAI(base_url='http://<ip>:80/v1')
#model = 'OpenGVLab/InternVL-Chat-V1-5'
prompt = """<response_instructions>
- Act as a keen observer with a sharp eye for detail.
- Analyze the content within the images.
- Provide insights based on your observations.
- Avoid making up facts.
- Finally, according to our chat history, above documents, above figure captions, or given images, generate a well-structured response.
</response_instructions>
What tower do you see in the image?
"""
from PIL import Image
import base64
import requests
from io import BytesIO
# The encoding function I linked previously - but we actually don't use this function in the API server
def encode_image_base64(image: Image.Image, format: str = 'JPEG') -> str:
"""encode image to base64 format."""
buffered = BytesIO()
if format == 'JPEG':
image = image.convert('RGB')
image.save(buffered, format)
return base64.b64encode(buffered.getvalue()).decode('utf-8')
# This is what we use in the API server to load the base64 string to image
def load_image_from_base64(image: str):
"""Load image from base64 format."""
return Image.open(BytesIO(base64.b64decode(image)))
image1 = '/tmp/image_file_764ae7bd-6b02-4ffb-b9d6-83e754c30952.jpeg'
image2 = '/tmp/image_file_1bfb88ea-a545-4b1f-a31f-051dbb90a378.jpeg'
image3 = '/tmp/image_file_ac5589e7-92a3-470f-a933-40d6bad38052.jpeg'
#from PIL import Image
def remove_padding(image_path, output_path, background_color=(255, 255, 255)):
# Read the image
image = cv2.imread(image_path)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply a binary threshold to get a binary image
_, binary = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
# Invert the binary image
inverted_binary = cv2.bitwise_not(binary)
# Find contours
contours, _ = cv2.findContours(inverted_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get the bounding box of the largest contour
x, y, w, h = cv2.boundingRect(contours[0])
for contour in contours:
x1, y1, w1, h1 = cv2.boundingRect(contour)
if w1 * h1 > w * h:
x, y, w, h = x1, y1, w1, h1
# Crop the image to the bounding box
cropped_image = image[y:y+h, x:x+w]
# Save the cropped image
cv2.imwrite(output_path, cropped_image)
# Example usage
if False:
ext = 'b.jpg'
remove_padding(image1, image1 + ext)
remove_padding(image2, image2 + ext)
remove_padding(image3, image3 + ext)
else:
ext = ''
image1_64 = base64.b64encode(open(image1 + ext, 'rb').read()).decode('utf-8')
image2_64 = base64.b64encode(open(image2 + ext, 'rb').read()).decode('utf-8')
image3_64 = base64.b64encode(open(image3 + ext, 'rb').read()).decode('utf-8')
system_prompt = "You are h2oGPTe, an expert question-answering AI system created by H2O.ai that performs like GPT-4 by OpenAI."
messages = [
#{'role': 'system', 'content': system_prompt},
{
'role': 'user',
'content': [
{'type': 'image_url',
'image_url': {
'url': 'data:image/jpeg;base64,' + image1_64,
}
},
{'type': 'image_url',
'image_url': {
'url': 'data:image/jpeg;base64,' + image2_64,
}
},
{'type': 'image_url',
'image_url': {
'url': 'data:image/jpeg;base64,' + image3_64,
}
},
{'type': 'text', 'text': prompt},
],
}
]
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=300,
temperature=0.0,
)
print(response.choices[0])
gives:
The image does not show a tower. Instead, it shows two separate items:\n\n1. A receipt from a shopping store.\n2. A cake with a message congratulating Kate and Duke on their upcoming arrival.\n\nIf you have any specific questions about these items, please let me know!
The text was updated successfully, but these errors were encountered:
gives:
The text was updated successfully, but these errors were encountered: