[Image-SIG] How to read IPTC info?

Fredrik Lundh fredrik@pythonware.com
Sun, 18 Nov 2001 17:35:24 +0100


"Ausum" wrote:
> Does anyone know what's the right code to read IPTC metadata
> from photoshop generated jpeg images? Basically I need to know
> how to read at least the caption info.

the following sample contains a helper function that reads
IPTC/NAA metadata from JPEG and TIFF images, using PIL's
built-in IPTC, JPEG, and TIFF parsers.

the resulting dictionary maps IPTC/NAA field codes (given as
2-tuples) to plain strings.

info[(2, 120)] is the caption.

for the rest, see the IPTC/NAA docs (if you have them), or
run the script on a sample file.

</F>

#
# read IPTC info fields from PhotoShop TIFF and JPEG files
#
# by Fredrik Lundh, November 2001
#

import Image, TiffImagePlugin, JpegImagePlugin, IptcImagePlugin
import StringIO

class FakeImage:
    pass

def getiptcinfo(im):
    # extract IPTC data from a PhotoShop JPEG or TIFF file

    data = None

    if isinstance(im, JpegImagePlugin.JpegImageFile):
        # extract the IPTC/NAA resource
        try:
            app = im.app["APP13"]
            if app[:14] == "Photoshop 3.0\x00":
                app = app[14:]
                # parse the image resource block
                offset = 0
                while app[offset:offset+4] == "8BIM":
                    offset = offset + 4
                    # resource code
                    code = JpegImagePlugin.i16(app, offset)
                    offset = offset + 2
                    # resource name (usually empty)
                    name_len = ord(app[offset])
                    name = app[offset+1:offset+1+name_len]
                    offset = 1 + offset + name_len
                    if offset & 1:
                        offset = offset + 1
                    # resource data block
                    size = JpegImagePlugin.i32(app, offset)
                    offset = offset + 4
                    if code == 0x0404:
                        # 0x0404 contains IPTC/NAA data
                        data = app[offset:offset+size]
                        break
                    offset = offset + size
                    if offset & 1:
                        offset = offset + 1
        except (AttributeError, KeyError):
            pass

    elif isinstance(im, TiffImagePlugin.TiffImageFile):
        # get raw data from the IPTC/NAA tag (PhotoShop tags the data
        # as 4-byte integers, so we cannot use the get method...)
        try:
            type, data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
        except (AttributeError, KeyError):
            pass

    if data is None:
        return None # no properties

    # create an IptcImagePlugin object without initializing it
    im = FakeImage()
    im.__class__ = IptcImagePlugin.IptcImageFile

    # parse the IPTC information chunk
    im.info = {}
    im.fp = StringIO.StringIO(data)

    try:
        im._open()
    except (IndexError, KeyError):
        pass # expected failure

    return im.info

if __name__ == "__main__":

    import sys

    for file in sys.argv[1:]:
        print file, "..."
        im = Image.open(sys.argv[1])
        info = getiptcinfo(im)
        if info:
            # extract caption
            print "  CAPTION", info.get((2, 120))
            # print all available fields
            for k, v in info.items():
                print "  %s %s" % (k, repr(v))
        else:
            print "  no info"