-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScrapeLCSCimagesToOdoo.py
187 lines (140 loc) · 7.17 KB
/
ScrapeLCSCimagesToOdoo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""
LCSC Image Scraper
Grabs images of products with JLC numbers by going to the LCSC website, navigating to the part, downloading the image, and uploading it to the Odoo part.
These images are less impressive than DigiKeys, so best used to just fill in the blanks.
"""
from selenium import webdriver
from bs4 import BeautifulSoup
import re
import requests
import base64
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import xmlrpc.client
import pandas as pd
import traceback
# Odoo connection details
url = "https://dev2-v17.apps.bluerobotics.com/"
db = "20241009_v2"
username = "[email protected]"
password = "VqqpHGVZCh3yfj7"
print("Establishing connection to Odoo Server...")
# Establishing connection
common = xmlrpc.client.ServerProxy(f'{url}/xmlrpc/2/common')
uid = common.authenticate(db, username, password, {})
models = xmlrpc.client.ServerProxy(f'{url}/xmlrpc/2/object')
print("Connected.")
def load_odoo_vendors_as_df():
print("Gathering all BRE parts from Odoo...")
# Define the prefix you want to search for
default_code_prefix = "BRE-" # Replace with the actual prefix you're searching for
# Search for products whose default_code starts with the given prefix using 'ilike'
product_ids = models.execute_kw(db, uid, password, 'product.product', 'search', [[['default_code', 'ilike', f"{default_code_prefix}%"]]])
# Step 1: Check if any supplier info records exist for the given product template IDs
supplierinfo_ids = models.execute_kw(db, uid, password, 'product.supplierinfo', 'search', [[['product_id', 'in', product_ids]]])
product_bres = models.execute_kw(db, uid, password, 'product.product', 'read', [product_ids], {'fields': ['default_code']})
product_bre_map = {product['id']: product['default_code'] for product in product_bres}
odoo_vendors_list = []
# Step 2: If supplierinfo_ids is not empty, read the records
if supplierinfo_ids:
suppliers = models.execute_kw(db, uid, password, 'product.supplierinfo', 'read', [supplierinfo_ids], {'fields': ['partner_id', 'product_code', 'product_id']})
for supplier in suppliers:
odoo_vendors_list.append({'BRE Number': product_bre_map[supplier['product_id'][0]], 'Supplier': supplier['partner_id'][1], 'SPN': supplier['product_code']})
else:
print("No supplier information found for the provided product template IDs.")
odoo_vendors_df = pd.DataFrame(odoo_vendors_list)
return supplierinfo_ids, odoo_vendors_df
def get_lcsc_image_url(part_number):
# Set up Chrome options for headless mode
options = webdriver.ChromeOptions()
options.add_argument("--headless") # Run in headless mode
options.add_argument("--no-sandbox") # Optional: for compatibility
options.add_argument("--disable-dev-shm-usage") # Optional: for compatibility
# Initialize the WebDriver
driver = webdriver.Chrome(options) # Provide the path to chromedriver if needed
# Construct and open the search URL
search_url = f"https://www.lcsc.com/search?q={part_number}"
driver.get(search_url)
# Retrieve the current page URL after loading
current_url = driver.current_url
print("Current URL:", current_url)
# Wait for the specific image container to load
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "v-image__image"))
)
time.sleep(3) # Additional wait to ensure full load
except:
print("Image container not loaded.")
# Get the page source and parse it with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Close the WebDriver as it is no longer needed
driver.quit()
# Search for the div containing the background image URL
image_url = None
image_div = soup.find('div', class_='v-image__image v-image__image--contain')
if image_div:
# Extract the URL from the style attribute using regex
style = image_div.get('style', '')
url_match = re.search(r'url\("(.+?)"\)', style)
if url_match:
image_url = url_match.group(1)
# If we found the image URL, download the image with headers
if image_url:
print("Image URL:", image_url)
return image_url
else:
print("Image URL not found.")
def add_image_to_odoo_part(product_id, product, image_url):
# Add headers to mimic a real browser request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
}
response = requests.get(image_url, headers=headers)
if response.status_code == 200:
# Convert the image to base64 encoding
image_base64 = base64.b64encode(response.content).decode('utf-8')
try:
# Create the new product
models.execute_kw(db, uid, password, 'product.product', 'write', [[product_id], {'image_1920': image_base64}])
print(f"Success: New image added to {product[0]['default_code']}")
except Exception as e:
# Capture the full traceback to show the detailed error
error_message = traceback.format_exc()
print(f"An error occurred: {error_message}")
else:
print("Failed to get image. Status code:", response.status_code)
dir_path = os.path.dirname(os.path.realpath(__file__))
scraper_path = os.path.join(dir_path, "lcsc-scraper", "main.py")
image_path = os.path.join(dir_path, "Images")
supplierinfo_ids, odoo_vendors_df = load_odoo_vendors_as_df()
#for vendor in odoo_vendors_list:
# FIND FIRST JLC VENDOR AND SCRAPE ITS IMAGE OFF OF LCSC
# Specify BRE numbers
default_code_prefix = "BRE-"
# Search for products whose default_code starts with the given prefix using 'ilike'
product_ids = models.execute_kw(db, uid, password, 'product.product', 'search', [[['default_code', 'ilike', f"{default_code_prefix}%"]]])
for product_id in product_ids:
# Read the image_1920 field
product = models.execute_kw(db, uid, password, 'product.product', 'read', [product_id], {'fields': ['image_1920', 'default_code']})
# Check if image_1920 contains data
if product[0]['image_1920']:
print(f"{product[0]['default_code']} already has an image.")
continue
print("The product does not have an image, checking for a JLC number.")
# Search the Odoo vendors dataframe for JLC parts with just the BRE number of interest
product_jlc = odoo_vendors_df[(odoo_vendors_df["BRE Number"] == product[0]['default_code']) & (odoo_vendors_df["Supplier"] == "JLCPCB")]
# If this dataframe slice isn't empty, we can use the JLC part number to scrape the image url off of LCSC's webpage
if product_jlc.empty:
print(f"{product[0]['default_code']} does not have a JLCPCB number")
continue
print(f"{product[0]['default_code']} has a JLCPCB number")
jlc_part_number = product_jlc.iloc[0].SPN
image_url = get_lcsc_image_url(jlc_part_number)
if image_url == None:
print("LCSC has no image for this part")
continue
add_image_to_odoo_part(product_id, product, image_url)