import {AppSegmentParserBase} from '../parser.mjs' import {segmentParsers} from '../plugins.mjs' import {undefinedIfEmpty} from '../util/helpers.mjs' import {BufferView} from '../util/BufferView.mjs' const XMP_CORE_HEADER = 'http://ns.adobe.com/' const XMP_MAIN_HEADER = 'http://ns.adobe.com/xap/1.0/' const XMP_EXTENDED_HEADER = 'http://ns.adobe.com/xmp/extension/' // 2 bytes for markers + 2 bytes length const TIFF_HEADER_LENGTH = 4 // Each XMP Extended segment starts with guid, length and offset (of its own, not just the one in TIFF header) const XMP_EXTENDED_DATA_OFFSET = 79 export default class XmpParser extends AppSegmentParserBase { static type = 'xmp' static multiSegment = true static canHandle(chunk, offset) { return chunk.getUint8(offset + 1) === 0xE1 && chunk.getUint32(offset + 4) === 0x68747470 // 'http' && chunk.getString(offset + 4, XMP_CORE_HEADER.length) === XMP_CORE_HEADER } static headerLength(chunk, offset) { let headerString = chunk.getString(offset + 4, XMP_EXTENDED_HEADER.length) if (headerString === XMP_EXTENDED_HEADER) return XMP_EXTENDED_DATA_OFFSET else return TIFF_HEADER_LENGTH + XMP_MAIN_HEADER.length + 1 // 1 for null termination between header and data } static findPosition(chunk, offset) { let seg = super.findPosition(chunk, offset) // first is the main XMP, then the extended starts counting from 0. // We could determine that the XMP has extension if we looked for 'HasExtendedXMP' // but we don't want to read the segment here just yet. seg.multiSegment = seg.extended = seg.headerLength === XMP_EXTENDED_DATA_OFFSET if (seg.multiSegment) { seg.chunkCount = chunk.getUint8(offset + 72) seg.chunkNumber = chunk.getUint8(offset + 76) // first and second chunk both have 0 as the chunk number. // the true first chunk (the one with seg.chunk.getString()).join('') } // WARNING: XMP as IFD0 tag in TIFF can be either Uint8Array or string. // We need to be ready to accept any input data and turn it into string. normalizeInput(input) { return typeof input === 'string' ? input : BufferView.from(input).getString() } // warning: The content may or may not be wrapped into ' // '' // '' parse(xmpString = this.chunk) { if (!this.localOptions.parse) return xmpString xmpString = idNestedTags(xmpString) let tags = XmlTag.findAll(xmpString, 'rdf', 'Description') if (tags.length === 0) tags.push(new XmlTag('rdf', 'Description', undefined, xmpString)) let xmp = {} let namespace for (let tag of tags) { for (let prop of tag.properties) { namespace = getNamespace(prop.ns, xmp) assignToObject(prop, namespace) } } return pruneObject(xmp) } assignToOutput(root, xmp) { if (!this.localOptions.parse) { // xmp is not parsed, we include the string into output as is root.xmp = xmp } else { // properties are grouped into separate namespace objects // XMP TIFF namespace is merged into IFD0 block of TIFF segment // XMP EXIF namespace is merged into EXIF block of TIFF segment // All other namespaces are assigned for (let [ns, nsObject] of Object.entries(xmp)) { switch (ns) { case 'tiff': this.assignObjectToOutput(root, 'ifd0', nsObject) break case 'exif': this.assignObjectToOutput(root, 'exif', nsObject) break case 'xmlns': // XMLNS attributes aren't links but namespace identifiers in the URI form. // TLDR: It's a useless bullshit. Don't need it. Get over it. break default: this.assignObjectToOutput(root, ns, nsObject) break } } } } } // removes undefined properties and empty objects function pruneObject(object) { let val for (let key in object) { val = object[key] = undefinedIfEmpty(object[key]) if (val === undefined) delete object[key] } return undefinedIfEmpty(object) } segmentParsers.set('xmp', XmpParser) // ----- ATTRIBUTES ----- export class XmlAttr { static findAll(string) { // NOTE: regex has to be recreated each time because it's stateful due to use in exec() let regex = /([a-zA-Z0-9-]+):([a-zA-Z0-9-]+)=("[^"]*"|'[^']*')/gm return matchAll(string, regex).map(XmlAttr.unpackMatch) } static unpackMatch(match) { let ns = match[1] let name = match[2] let value = match[3].slice(1, -1) value = normalizeValue(value) return new XmlAttr(ns, name, value) } constructor(ns, name, value) { this.ns = ns this.name = name this.value = value } serialize() { return this.value } } // ----- TAGS ----- const tagNamePartRegex = '[\\w\\d-]+' const VALUE_PROP = 'value' export class XmlTag { static findAll(xmpString, ns, name) { // NOTE: regex has to be recreated each time because it's stateful due to use in exec() // handles both pair and self-closing tags. if (ns !== undefined || name !== undefined) { ns = ns || tagNamePartRegex name = name || tagNamePartRegex var regex = new RegExp(`<(${ns}):(${name})(#\\d+)?((\\s+?[\\w\\d-:]+=("[^"]*"|'[^']*'))*\\s*)(\\/>|>([\\s\\S]*?)<\\/\\1:\\2\\3>)`, 'gm') } else { var regex = /<([\w\d-]+):([\w\d-]+)(#\d+)?((\s+?[\w\d-:]+=("[^"]*"|'[^']*'))*\s*)(\/>|>([\s\S]*?)<\/\1:\2\3>)/gm } return matchAll(xmpString, regex).map(XmlTag.unpackMatch) } static unpackMatch(match) { let ns = match[1] let name = match[2] let attrString = match[4] let innerXml = match[8] return new XmlTag(ns, name, attrString, innerXml) } constructor(ns, name, attrString, innerXml) { this.ns = ns this.name = name this.attrString = attrString this.innerXml = innerXml this.attrs = XmlAttr.findAll(attrString) this.children = XmlTag.findAll(innerXml) this.value = this.children.length === 0 ? normalizeValue(innerXml) : undefined this.properties = [...this.attrs, ...this.children] } get isPrimitive() { return this.value !== undefined && this.attrs.length === 0 && this.children.length === 0 } get isListContainer() { return this.children.length === 1 && this.children[0].isList } get isList() { let {ns, name} = this return ns === 'rdf' && (name === 'Seq' || name === 'Bag' || name === 'Alt') } get isListItem() { return this.ns === 'rdf' && this.name === 'li' } serialize() { // invalid and undefined if (this.properties.length === 0 && this.value === undefined) return undefined // primitive property if (this.isPrimitive) return this.value // tag containing list tag ... if (this.isListContainer) return this.children[0].serialize() // list tag itself ... if (this.isList) return unwrapArray(this.children.map(serialize)) // sometimes may have a single object-tag child. We need that object returned. if (this.isListItem && this.children.length === 1 && this.attrs.length === 0) return this.children[0].serialize() // process attributes and children tags into object let output = {} for (let prop of this.properties) assignToObject(prop, output) if (this.value !== undefined) output[VALUE_PROP] = this.value return undefinedIfEmpty(output) } } // ----- UTILS ----- function assignToObject(prop, target) { let serialized = prop.serialize() if (serialized !== undefined) target[prop.name] = serialized } var serialize = prop => prop.serialize() var unwrapArray = array => array.length === 1 ? array[0] : array var getNamespace = (ns, root) => root[ns] ? root[ns] : root[ns] = {} function matchAll(string, regex) { let matches = [] if (!string) return matches let match while ((match = regex.exec(string)) !== null) matches.push(match) return matches } export function normalizeValue(value) { if (isUndefinable(value)) return undefined let num = Number(value) if (!Number.isNaN(num)) return num let lowercase = value.toLowerCase() if (lowercase === 'true') return true if (lowercase === 'false') return false return value.trim() } function isUndefinable(value) { return value === null || value === undefined || value === 'null' || value === 'undefined' || value === '' || value.trim() === '' } const nestedLiRegex = /(<|\/)(rdf:li|rdf:Seq|rdf:Bag|rdf:Alt)/g export function idNestedTags(xmpString) { let counts = { 'rdf:li': 1, 'rdf:Seq': 1, 'rdf:Bag': 1, 'rdf:Alt': 1, } return xmpString.replace(nestedLiRegex, (match, prevChar, tag) => { if (prevChar === '<') return `${match}#${counts[tag]++}` else return `${match}#${--counts[tag]}` }) }