“Ekstrak gambar dari PDF” Kode Jawaban

Ekstrak gambar dari PDF Python

import PyPDF2

from PIL import Image

if __name__ == '__main__':
    input1 = PyPDF2.PdfFileReader(open("input.pdf", "rb"))
    page0 = input1.getPage(0)
    xObject = page0['/Resources']['/XObject'].getObject()

    for obj in xObject:
        if xObject[obj]['/Subtype'] == '/Image':
            size = (xObject[obj]['/Width'], xObject[obj]['/Height'])
            data = xObject[obj].getData()
            if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
                mode = "RGB"
            else:
                mode = "P"

            if xObject[obj]['/Filter'] == '/FlateDecode':
                img = Image.frombytes(mode, size, data)
                img.save(obj[1:] + ".png")
            elif xObject[obj]['/Filter'] == '/DCTDecode':
                img = open(obj[1:] + ".jpg", "wb")
                img.write(data)
                img.close()
            elif xObject[obj]['/Filter'] == '/JPXDecode':
                img = open(obj[1:] + ".jp2", "wb")
                img.write(data)
                img.close()
Repulsive Rattlesnake

# Ekstrak gambar dari file pdf

# extract images from pdf file
import fitz
doc = fitz.open("file.pdf")
for i in range(len(doc)):
    for img in doc.getPageImageList(i):
        xref = img[0]
        pix = fitz.Pixmap(doc, xref)
        if pix.n < 5:       # this is GRAY or RGB
            pix.writePNG("p%s-%s.png" % (i, xref))
        else:               # CMYK: convert to RGB first
            pix1 = fitz.Pixmap(fitz.csRGB, pix)
            pix1.writePNG("p%s-%s.png" % (i, xref))
            pix1 = None
        pix = None
Impossible Impala

Ekstrak gambar dari PDF

# STEP 1
# import libraries
import fitz
import io
from PIL import Image
  
# STEP 2
# file path you want to extract images from
file = "/content/pdf_file.pdf"
  
# open the file
pdf_file = fitz.open(file)
  
# STEP 3
# iterate over PDF pages
for page_index in range(len(pdf_file)):
    
    # get the page itself
    page = pdf_file[page_index]
    image_list = page.getImageList()
      
    # printing number of images found in this page
    if image_list:
        print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
    else:
        print("[!] No images found on page", page_index)
    for image_index, img in enumerate(page.getImageList(), start=1):
        
        # get the XREF of the image
        xref = img[0]
          
        # extract the image bytes
        base_image = pdf_file.extractImage(xref)
        image_bytes = base_image["image"]
          
        # get the image extension
        image_ext = base_image["ext"]
joel hao

Jawaban yang mirip dengan “Ekstrak gambar dari PDF”

Pertanyaan yang mirip dengan “Ekstrak gambar dari PDF”

Lebih banyak jawaban terkait untuk “Ekstrak gambar dari PDF” di Python

Jelajahi jawaban kode populer menurut bahasa

Jelajahi bahasa kode lainnya