#
# 保存csv格式的数据 import csvcsvFile = open('test.csv','w+',newline='') #文本方式可读写try: writer = csv.writer(csvFile) writer.writerow(('num','num+2','num*2')) for i in range(10): writer.writerow((i,i+2,i*2))finally: csvFile.close()
#
# mysql python操作import pymysql #导包conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password=None, db='mysql',chaset='utf8') #创建连接cur = conn.cursor() #创建游标cur.execute("USE scraping") #执行 使用库cur.execute("SELECT * FROM pages WHERE id=1") #执行语句print(cur.fetchone()) #获取单条数据cur.close() #游标关闭conn.close() #连接关闭
#
str = bytes(value=b'', encoding=None) #指定编码
from urllib.request import urlopenfrom io import StringIO #字符串的缓存import csvdata = urlopen("http://pythonscraping.com/files/MontyPythonAlbums.csv").read().decode('ascii', 'ignore')dataFile = StringIO(data)csvReader = csv.reader(dataFile)for row in csvReader: print("The album \""+row[0]+"\" was released in "+str(row[1]))
#pdfminer3kfrom pdfminer.pdfinterp import PDFResourceManager, process_pdffrom pdfminer.converter import TextConverterfrom pdfminer.layout import LAParamsfrom io import StringIOfrom io import openfrom urllib.request import urlopendef readPDF(pdfFile): rsrcmgr = PDFResourceManager() retstr = StringIO() laparams = LAParams() device = TextConverter(rsrcmgr, retstr, laparams=laparams) process_pdf(rsrcmgr, device, pdfFile) device.close() content = retstr.getvalue() retstr.close() return contentpdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf")outputString = readPDF(pdfFile)print(outputString)pdfFile.close()
from zipfile import ZipFile #docxfrom urllib.request import urlopenfrom io import BytesIOfrom bs4 import BeautifulSoupwordFile = urlopen("http://pythonscraping.com/pages/AWordDocument.docx").read()wordFile = BytesIO(wordFile)document = ZipFile(wordFile)xml_content = document.read('word/document.xml')wordObj = BeautifulSoup(xml_content.decode('utf-8'), "lxml-xml")textStrings = wordObj.findAll("w:t")for textElem in textStrings: closeTag = "" try: style = textElem.parent.previousSibling.find("w:pStyle") if style is not None and style["w:val"] == "Title": print("") closeTag = "
" except AttributeError: #不打印标签 pass print(textElem.text) print(closeTag)