2022年10月12日 星期三

Optical Character Recognition using python pytesseract module

 #we first import our libraries here

# Optical Character Recognition using python pytesseract module

from PIL import Image

import pytesseract


#Here we specified the path to our tessseract installation

pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'


#This is the name of the image we have above

img_name = 'D:/gary/no2216_P10.jpg'


# Opening the image & storing it in an image object

img = Image.open(img_name)


# we will use this particular function to extract the text from the

text = pytesseract.image_to_string(img, lang='chi_tra')


# We will display the result below

print(text)

WebScraping Tech Companies Stock data

 from bs4 import BeautifulSoup

import requests

import pandas as pd


# Put everything together inside a For-Loop


company_name = []

company_info = []


# website in variable

website = 'https://goodinfo.tw/tw/StockList.asp?MARKET_CAT=%E5%85%A8%E9%83%A8&INDUSTRY_CAT=%E8%B3%87%E8%A8%8A%E6%9C%8D%E5%8B%99%E6%A5%AD&SHEET=%E4%BA%A4%E6%98%93%E7%8B%80%E6%B3%81&SHEET2=%E6%97%A5&RPT_TIME=%E6%9C%80%E6%96%B0%E8%B3%87%E6%96%99'


# request to website

response = requests.get(website, headers={

    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"

})


# soup object

soup = BeautifulSoup(response.content, 'html.parser')


# results

results = soup.find('table',{'class':'r10 b1 p4_1'})


# Create column name

columns = [th.text.replace('\n', '') for th in results.find('tr').find_all('th')]


# loop through

trs = results.find_all('tr')[1:]

rows = list()

for tr in trs:

    rows.append([td.text.replace('\n', '').replace("\xa0",'') for td in tr.find_all('td')])


for tr in trs:

    rows.append(tr.find('td').get_text())


# dictionary

company_detail = pd.DataFrame(data = rows, columns = columns)


# Output in Excel

company_detail.to_excel('TechCompanies.xlsx', index = False)

2022年10月6日 星期四

Rename Workbook Name Using Python openpyxl module

 #   Rename Workbook Name Using Python openpyxl module


import openpyxl

import os


os.chdir('C:/Users/gary')

ss = openpyxl.load_workbook("file1.xlsx")


ss_sheet = ss.get_sheet_by_name('Workbook00')

ss_sheet.title = 'Workbook01'

ss.save('file1.xlsx')

Copy Excel Sheet to another Using Python openpyxl module

# Copy Excel Sheet to another Using Python openpyxl module

import openpyxl as xl


path1 = 'C:/Users/gary/file1.xlsx'

path2 = 'D:/C:/Users/gary/file2.xlsx'


wb1 = xl.load_workbook(filename = path1)

ws1 = wb1.worksheets[0]


wb2 = xl.load_workbook(filename = path2)

ws2 = wb3.create_sheet(ws1.title)

for row in ws1:

    for cell in row:

        ws2[cell.coordinate].value = cell.value


wb2.save(path2)


Convert xls files to xlsx Using Python Pandas Module and then Copy and Paste the source Excel file to destination Excel file using Python openpyxl module

 # Convert xls files to xlsx Using Python Pandas Module

 # and then Copy and Paste the source Excel file to destination Excel file using Python openpyxl module


#  Converting xls files to xlsx

import pandas as pd

import os


df = pd.read_excel('file1.xls')

df.to_excel('file1.xlsx')


# importing openpyxl module

import openpyxl as xl


# opening the source excel file

filename = "C:/Users/gary/file1.xlsx"

wb1 = xl.load_workbook(filename)

ws1 = wb1.worksheets[0]


# opening the destination excel file

filename1 = "C:/Users/gary/file2.xlsx"

wb2 = xl.load_workbook(filename1)

ws2 = wb2.active


# calculate total number of rows and

# columns in source excel file

mr = ws1.max_row

mc = ws1.max_column


print(mr)

print(mc)


# copying the cell values from source

# excel file to destination excel file

for i in range(1, mr + 1):

    for j in range(1, mc + 1):

        # reading cell value from source excel file

        c = ws1.cell(row = i, column = j)


        # writing the read value to destination excel file

        ws2.cell(row = i, column = j).value = c.value


# saving the destination excel file

wb2.save(str(filename1))

Copy and Paste the source Excel file to destination Excel file using Python openpyxl module

# Copy and Paste the source Excel file to destination Excel file using Python openpyxl module

# importing openpyxl module

import openpyxl as xl


# opening the source excel file

filename = "C:/Users/gary/file1.xlsx"

wb1 = xl.load_workbook(filename)

ws1 = wb1.worksheets[0]


# opening the destination excel file

filename1 = "C:/Users/gary/file2.xlsx"

wb2 = xl.load_workbook(filename1)

ws2 = wb2.active


# calculate total number of rows and

# columns in source excel file

mr = ws1.max_row

mc = ws1.max_column


print(mr)

print(mc)


# copying the cell values from source

# excel file to destination excel file

for i in range (1, mr + 1):

    for j in range (1, mc + 1):

        # reading cell value from source excel file

        c = ws1.cell(row = i , column = j)


        # writing the read value to destination excel file

        ws2.cell(row = i , column = j).value = c.value


# saving the destination excel file

wb2.save(str(filename1))

Convert xls files to xlsx Using Python Pandas Module

 # Convert xls files to xlsx using python pandas module

 # importing pandas as module

import pandas as pd

import os


df = pd.read_excel('file1.xls')

df.to_excel('file1.xlsx')

Python program to display calendar

# Python program to display calendar of given month of the year # importing calendar module for calendar operations import calendar # set t...