I learned
내일배움캠프 AI - TIL 65
이모냥냥
2022. 12. 5. 01:16
반응형
❤️🔥TIL : Today I Learned❤️🔥
그날그날 내가 공부한 것을 정리하는 것
내일배움캠프 AI트랙 65day
오늘 배운 내용 - 크롤링으로 화장품 데이터 가져오기
import requests
from bs4 import BeautifulSoup
import json
file_path = "./static/json/perfume.json"
# start = 26120000
end = 26192510
start = end - 1000
perfume = []
for num in range(26192367, 26192367+1):
req = requests.get(f'https://basenotes.com/fragrances/{num}')
soup = BeautifulSoup(req.text, 'html.parser')
body = soup.select_one(".p-body-main")
if(body.select_one("h1>span[itemprop='Name']")):
# 향수 정보 크롤링
perfume_name = body.select_one("h1>span[itemprop='Name']").text
brand = body.select_one("span[itemprop='brand']>a")
gender = body.select_one("h1>span:nth-child(2)>i")
launch_date = body.select_one("h1>span:nth-child(3)>span")
thumbnail = body.select_one(".bnheroimageouter>img")
pirce = body.select_one(".bnminicontainer .bncard.card4 .ebayimage>div")
perfume_brand_name = brand.text if brand else None
perfume_gender = gender.get("class")[0][-1:] if gender else "S" # 향수 주사용 성별 : (F)Female/(M)Male/(S)uniSex
perfume_launch_date = launch_date.text.split(" ")[-1][1:-1]+"-01-01" if launch_date.text.split(" ")[-1][1:-1] else None
perfume_thumbnail = "https://basenotes.com"+thumbnail.get("src") if thumbnail else None
perfume_price = float(pirce.text.replace("USD","").replace("\t","").replace("\n","")) if pirce else 0
# 향 정보 크롤링
notes_data = {'Top':[],'Heart':[],'Base':[],'None':[]}
fragrancenotes = soup.select("ol.fragrancenotes>li")
for fragrancenote in fragrancenotes:
note_type = fragrancenote.select_one("h3").text.replace("\t","").replace("\n","").split(" ")[0] if fragrancenote.select_one("h3") else "None" # 사용된 향의 포지션 : Top / Heart / Base / None
notes = fragrancenote.select("li")
for note in notes:
if note.select_one("a"):
note_name = note.select_one("a").text
else:
note_name = note.text.replace("\t","").replace("\n","")
notes_data[note_type].append(note_name)
result = {
"origin_id": num,
"image": perfume_thumbnail,
"title": perfume_name,
"brand": perfume_brand_name,
"gender" : perfume_gender,
"price" : perfume_price,
"launch_date" : perfume_launch_date,
"top_notes" : notes_data['Top'],
"heart_notes" : notes_data['Heart'],
"base_notes" : notes_data['Base'],
"none_notes" : notes_data['None'],
}
new_data = {"model": "perfume.perfume"}
new_data["fields"] = result
print(new_data)
perfume.append(new_data)
with open(file_path, 'w', encoding="utf-8") as outfile:
json.dump(perfume, outfile, ensure_ascii=False, indent=4)
반응형