working on it ...

Filters

Explore Public Snippets

Sort by

Found 1 snippet

    public by antlong  489191  15  7  1

    Extract JPG images from a PDF

    A command line tool to extract jpg images from pdf files.
    #!/usr/bin/env python
    import sys
    
    
    def main():
        """Extract JPG's from PSD's.
        
        Usage:
          python extract.py filename.pdf
        
        Note:
          All extracted images will be saved to the directory 
          the script is initialized in.
        """
        try:
            pdf = file(sys.argv[1], "rb").read()
        except Exception:
            print "Usage: `python extract.py filename.pdf`"
            return
    
        startmark, endmark = "\xff\xd8", "\xff\xd9"
        startfix, endfix, i, njpg = 0, 2, 0, 0
    
        while True:
            istream = pdf.find("stream", i)
            if istream < 0:
                break
            istart = pdf.find(startmark, istream, istream+20)
            if istart < 0:
                i = istream+20
                continue
            iend = pdf.find("endstream", istart)
            if iend < 0:
                raise Exception("Couldn't find end of stream.")
            iend = pdf.find(endmark, iend-20)
            if iend < 0:
                raise Exception("Couldn't find end of JPG.")
         
            istart += startfix
            iend += endfix
            jpg = pdf[istart:iend]
            with open("jpg%d.jpg" % njpg, "wb") as _f:
                _f.write(jpg)
            njpg += 1
            i = iend
    
        print "Extracted %s JPG files." % njpg
    
    main()
    

    List Complete ( 1 snippet total )

    Cannot find what you were looking for?
    Ask the Snip2Code Community

    • Public Snippets
    • Channels Snippets