KUAN-HUNG LIN (Gary) M.S. Data Analytics in GMU: Web Scrapy Old Building

# Web Scrape using BeautifulSoup module

# http://group.lifego.tw/

# From the website, this project uses python BeautifulSoup module to scrapy old apartment communities

# and then save as an Excel file.

# importing module

from bs4 import BeautifulSoup

import requests

import pandas as pd

from openpyxl import load_workbook

# Put everything together inside a For-Loop

building_name = []

building_info = []

# Web Scraping BeautifulSoup Building Changhua

for i in range (1,4):

# website in variable

website1 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%bd%b0%e5%8c%96%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website1,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', sheet_name = 'building', index = False, header = False)

# Web Scraping BeautifulSoup Building Chiayi City

for i in range (1,4):

# website in variable

website2 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%98%89%e7%be%a9%e5%b8%82&a2=&q='

# request to website

response = requests.get(website2,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Chiayi Couty

# website in variable

website3 = 'http://group.lifego.tw/Litem.aspx?t=1041&a1=%e5%98%89%e7%be%a9%e7%b8%a3&a2='

# request to website

response = requests.get(website3,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Hsinchu

for i in range (1,7):

# website in variable

website4 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e6%96%b0%e7%ab%b9%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website4,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building HsinchuCity

for i in range (1,8):

# website in variable

website5 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e6%96%b0%e7%ab%b9%e5%b8%82&a2=&q='

# request to website

response = requests.get(website5,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Hualinen

for i in range (1,2):

# website in variable

website6 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e8%8a%b1%e8%93%ae%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website6,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Kaohsiung

for i in range (1,21):

# website in variable

website7 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e9%ab%98%e9%9b%84%e5%b8%82&a2=&q='

# request to website

response = requests.get(website7,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Keelung

for i in range (1,6):

# website in variable

website8 = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%9f%ba%e9%9a%86%e5%b8%82&a2=&q='

# request to website

response = requests.get(website8,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Kinmen

# website in variable

website9 = 'http://group.lifego.tw/Litem.aspx?t=1041&a1=%e9%87%91%e9%96%80%e7%b8%a3&a2='

# request to website

response = requests.get(website9,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Miaoli

for i in range (1,3):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e8%8b%97%e6%a0%97%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Nantou

for i in range (1,2):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%8d%97%e6%8a%95%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building NewTaipei

for i in range (1,21):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e6%96%b0%e5%8c%97%e5%b8%82&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Penghu

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&a1=%e6%be%8e%e6%b9%96%e7%b8%a3&a2='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Pingtung

for i in range (1,3):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%b1%8f%e6%9d%b1%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Taichung

for i in range (1,3):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%8f%b0%e4%b8%ad%e5%b8%82&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Tainan

for i in range (1,6):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%8f%b0%e5%8d%97%e5%b8%82&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Taipei

for i in range (1,21):

# website in variable

website = 'https://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%8f%b0%e5%8c%97%e5%b8%82&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Taitung

# website in variable

website = 'https://group.lifego.tw/Litem.aspx?t=1041&page=1&a1=%e5%8f%b0%e6%9d%b1%e7%b8%a3&a2='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Taoyuan

for i in range (1,21):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e6%a1%83%e5%9c%92%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Yilan

for i in range (1,4):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e5%ae%9c%e8%98%ad%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

# Web Scraping BeautifulSoup Building Yunlin

for i in range (1,2):

# website in variable

website = 'http://group.lifego.tw/Litem.aspx?t=1041&page=' + str(i) + '&a1=%e9%9b%b2%e6%9e%97%e7%b8%a3&a2=&q='

# request to website

response = requests.get(website,headers={

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})

# soup object

soup = BeautifulSoup(response.content, 'html.parser')

# results

results = soup.find_all('div',{'class':'product-info'})

# loop through results

for result in results:

# building name

building_name.append(result.find('font').get_text())

# building info

building_info.append(result.find('span').get_text())

# dictionary

building_detail = pd.DataFrame({'Building_Name': building_name, 'Building_Info': building_info})

# Output in Excel

building_detail.to_excel('舊大樓.xlsx', index = False, header = False)

KUAN-HUNG LIN (Gary) M.S. Data Analytics in GMU

2022年10月17日星期一

Web Scrapy Old Building

沒有留言:

張貼留言

Python program to display calendar

檢舉濫用情形

標籤

2022年10月17日 星期一

Web Scrapy Old Building

沒有留言:

張貼留言

Python program to display calendar

2022年10月17日星期一