Brett Zamir on 2010-04-11 23:30:40
When I said just now that my own patching does not validate, I didn't mean it is invalid code, but merely that it doesn't do checking of the validity of the diff supplied to it.
Brett Zamir on 2010-04-11 23:29:50
Hi, My own patching code does not validate. If you want some code that does, the following may work. It is based on code by Imgen Tata, though I have reformatted them to work as self-contained functions in the spirit of php.js (if combined, they could replace xdiff_string_patch):
/*
 * Patches original text to generate new text
 * @author Imgen Tata (http://www.myipdf.com/)
 * @see http://en.wikipedia.org/wiki/Diff#Unified_format
 * @param {String} ori_text The original text
 * @param {String} unidiff_patch The patch in unidiff format. will be validated.
 * @returns {String} The generated new text
 * @see Unified diff format on http://en.wikipedia.org/wiki/Diff#Unified_format
 */
function string_unidiff_patch (ori_text, unidiff_patch) {
    var HEADER_PREFIX = '@@ ',
        HEADER_SUFFIX = ' @@',
        ORIGINAL_INDICATOR = '-',
        NEW_INDICATOR = '+',
        RANGE_SEPARATOR = ',',
        CONTEXT_INDICATOR = ' ',
        DELETION_INDICATOR = '-',
        ADDITION_INDICATOR = '+',
        ori_lines,
        patch_lines,
        patch_line,
        new_lines = [], NEW_LINE = '\n',
        line_index = 0,
        last_line_index = 0,
        ori_hunk_start = 0,
        ori_hunk_size = 0,
        new_hunk_start = 0,
        new_hunk_size = 0,
        context_size = 0,
        deletion_size = 0,
        addition_size = 0,
        i, j,
        one_or_more_whitespace = '\\s*',
        number_extractor = '(\\d+)',
        //Construct the range extractor regular expression string
        range_extractor_reg_exp_str = HEADER_PREFIX + one_or_more_whitespace +
                                      ORIGINAL_INDICATOR + one_or_more_whitespace +
                                      number_extractor +
                                      RANGE_SEPARATOR + one_or_more_whitespace +
                                      number_extractor +
                                      one_or_more_whitespace +
                                      '\\' + NEW_INDICATOR +
                                      number_extractor +
                                      RANGE_SEPARATOR + one_or_more_whitespace +
                                      number_extractor +
                                      one_or_more_whitespace +
                                      HEADER_SUFFIX,

        range_extractor = new RegExp(range_extractor_reg_exp_str),
        ranges,
        ori_len,
        first_char,
        /*
        *Trims string
        */
        trim = function(text) {
            if (typeof text != 'string') {
                throw Error('String parameter required');
            }

            return text.replace(/(^\s*)|(\s*$)/g, '');
        },
        /*
        *Verifies type of arguments
        */
        verify_type = function(type) {
            var args = arguments,
                    args_len = arguments.length,
                    basic_types = ['number', 'boolean', 'string', 'function', 'object', 'undefined'],
                    basic_type,
                    i, j,
                    type_of_type = typeof type;
            if (type_of_type != 'string' &&
                type_of_type != 'function') {
                throw new Error('Bad type parameter');
            }

            if (args_len < 2) {
                throw new Error('Too few arguments');
            }

            if (type_of_type == 'string') {
                type = trim(type);

                if (type == '') {
                    throw new Error('Bad type parameter');
                }

                for (j = 0; j < basic_types.length; j++) {
                    basic_type = basic_types[j];

                    if (basic_type == type) {
                        for (i = 1; i < args_len; i++) {
                            if (typeof args[i] != type) {
                                throw new Error('Bad type');
                            }
                        }

                        return;
                    }
                }

                throw new Error('Bad type parameter');
            }

            //Not basic type. we need to use instanceof operator
            for (i = 1; i < args_len; i++) {
                if (!(args[i] instanceof type)) {
                    throw new Error('Bad type');
                }
            }
        },
        /*
        *Splits text into lines and return as a string array
        */
        split_into_lines = function(text) {
            verify_type('string', text);

            if (text == '') {
                return [];
            }
            return text.split('\n');
        },
        patch_hunk_header_validator = function( context_size,
                                                ori_hunk_size, delection_size,
                                                new_hunk_size, addition_size) {
            if (ori_hunk_size != (context_size + deletion_size) ||
                       new_hunk_size != (context_size + addition_size)
                       ) {
                throw 'invalid patch header';
            }
        };

    verify_type('string', ori_text);

    verify_type('string', unidiff_patch);

    ori_lines = split_into_lines(ori_text);
    ori_len = ori_lines.length;
    patch_lines = split_into_lines(unidiff_patch);
    for (i = 0; i < patch_lines.length; i++) {
        patch_line = patch_lines[i];
        if (patch_line === '') {//Skip empty line
            continue;
        }

        if (patch_line.indexOf(HEADER_PREFIX) != -1) {//A diff hunk header found
            //validate the previous patch header
            patch_hunk_header_validator(context_size,
                            ori_hunk_size, deletion_size,
                            new_hunk_size, addition_size);

            ranges = patch_line.match(range_extractor);
            if (ranges === null) {
                throw 'invalid patch header';
            }

            ori_hunk_start = ranges[1];
            ori_hunk_size = ranges[2];
            new_hunk_start = ranges[3];
            new_hunk_size = ranges[4];

            last_line_index = line_index;
            line_index = Math.max( ori_hunk_start - 1, 0);


            if(ori_len > 0){
                //Validate line index
                if (last_line_index > line_index || line_index > ori_len) {
                    throw 'invalid patch header';
                }

                for (j = last_line_index; j < line_index; j++) {
                    new_lines.push(ori_lines[j]);
                }
            }

            //validate new hunk start index
            if (new_hunk_start > 0 && new_hunk_start != new_lines.length + 1) {
                throw 'invalid patch header';
            }

            //Reset sizes
            context_size = 0;
            deletion_size = 0;
            addition_size = 0;

            continue;
        }

        first_char = patch_line.charAt(0);

        switch (first_char) {
            case CONTEXT_INDICATOR:
                new_lines.push(ori_lines[line_index]);
                line_index++;
                context_size++;
                break;

            case DELETION_INDICATOR:
                line_index++;
                deletion_size++;
                break;

            case ADDITION_INDICATOR:
                new_lines.push(patch_line.substr(1));
                addition_size++;
                break;

            default:
                throw 'Unrecognized initial character in unidiff line';
        }
    }

    //Validate the last patch header
    patch_hunk_header_validator(context_size,
                            ori_hunk_size, deletion_size,
                            new_hunk_size, addition_size);

    //Append the remaining lines
    while (line_index < ori_len) {
        new_lines.push(ori_lines[line_index]);
        line_index++;
    }

    return new_lines.join(NEW_LINE);
}

/*
 * Reverse patches original text to generate new text
 * @author Imgen Tata (http://www.myipdf.com/)
 * @see http://en.wikipedia.org/wiki/Diff#Unified_format
 * @param {String} new_text The new text
 * @param {String} unidiff_patch The patch in unidiff format. Will be validated.
 * @returns {String} The generated original text
 * @see Unified diff format on http://en.wikipedia.org/wiki/Diff#Unified_format
 */
function string_unidiff_reverse_patch (new_text, unidiff_patch) {
    var HEADER_PREFIX = '@@ ',
        HEADER_SUFFIX = ' @@',
        ORIGINAL_INDICATOR = '-',
        NEW_INDICATOR = '+',
        RANGE_SEPARATOR = ',',
        CONTEXT_INDICATOR = ' ',
        DELETION_INDICATOR = '-',
        ADDITION_INDICATOR = '+',
        new_lines, NEW_LINE = '\n',
        patch_lines,
        patch_line,
        ori_lines = [],
        line_index = 0,
        last_line_index = 0,
        ori_hunk_start = 0,
        ori_hunk_size = 0,
        new_hunk_start = 0,
        new_hunk_size = 0,
        context_size = 0,
        deletion_size = 0,
        addition_size = 0,
        i, j,
        one_or_more_whitespace = '\\s*',
        number_extractor = '(\\d+)',
        //Construct the range extractor regular expression string
        range_extractor_reg_exp_str = HEADER_PREFIX + one_or_more_whitespace +
                                      ORIGINAL_INDICATOR + one_or_more_whitespace +
                                      number_extractor +
                                      RANGE_SEPARATOR + one_or_more_whitespace +
                                      number_extractor +
                                      one_or_more_whitespace +
                                      '\\' + NEW_INDICATOR +
                                      number_extractor +
                                      RANGE_SEPARATOR + one_or_more_whitespace +
                                      number_extractor +
                                      one_or_more_whitespace +
                                      HEADER_SUFFIX,

        range_extractor = new RegExp(range_extractor_reg_exp_str),
        ranges,
        new_len,
        first_char,
        /*
        *Trims string
        */
        trim = function(text) {
            if (typeof text != 'string') {
                throw Error('String parameter required');
            }

            return text.replace(/(^\s*)|(\s*$)/g, '');
        },
        /*
        *Verifies type of arguments
        */
        verify_type = function(type) {
            var args = arguments,
                    args_len = arguments.length,
                    basic_types = ['number', 'boolean', 'string', 'function', 'object', 'undefined'],
                    basic_type,
                    i, j,
                    type_of_type = typeof type;
            if (type_of_type != 'string' &&
                type_of_type != 'function') {
                throw new Error('Bad type parameter');
            }

            if (args_len < 2) {
                throw new Error('Too few arguments');
            }

            if (type_of_type == 'string') {
                type = trim(type);

                if (type == '') {
                    throw new Error('Bad type parameter');
                }

                for (j = 0; j < basic_types.length; j++) {
                    basic_type = basic_types[j];

                    if (basic_type == type) {
                        for (i = 1; i < args_len; i++) {
                            if (typeof args[i] != type) {
                                throw new Error('Bad type');
                            }
                        }

                        return;
                    }
                }

                throw new Error('Bad type parameter');
            }

            //Not basic type. we need to use instanceof operator
            for (i = 1; i < args_len; i++) {
                if (!(args[i] instanceof type)) {
                    throw new Error('Bad type');
                }
            }
        },
        /*
        *Splits text into lines and return as a string array
        */
        split_into_lines = function(text) {
            verify_type('string', text);

            if (text == '') {
                return [];
            }
            return text.split('\n');
        },
        patch_hunk_header_validator = function( context_size,
                                                ori_hunk_size, delection_size,
                                                new_hunk_size, addition_size) {
            if (ori_hunk_size != (context_size + deletion_size) ||
                       new_hunk_size != (context_size + addition_size)
                       ) {
                throw 'invalid patch header';
            }
        };

    verify_type('string', new_text);

    verify_type('string', unidiff_patch);

    new_lines = split_into_lines(new_text);
    new_len = new_lines.length;
    patch_lines = split_into_lines(unidiff_patch);
    for (i = 0; i < patch_lines.length; i++) {
        patch_line = patch_lines[i];
        if (patch_line === '') {//Skip empty line
            continue;
        }

        if (patch_line.indexOf(HEADER_PREFIX) != -1) {//A diff hunk header found
            //validate the previous patch header
            patch_hunk_header_validator(context_size,
                            ori_hunk_size, deletion_size,
                            new_hunk_size, addition_size);

            ranges = patch_line.match(range_extractor);
            if (ranges === null) {
                throw 'invalid patch header';
            }

            ori_hunk_start = ranges[1];
            ori_hunk_size = ranges[2];
            new_hunk_start = ranges[3];
            new_hunk_size = ranges[4];

            last_line_index = line_index;
            line_index = Math.max( new_hunk_start - 1, 0);


            if(new_len > 0){
                //Validate line index
                if (last_line_index > line_index || line_index > new_len) {
                    throw 'invalid patch header';
                }

                for (j = last_line_index; j < line_index; j++) {
                    ori_lines.push(new_lines[j]);
                }
            }

            //validate original hunk start index
            if (ori_hunk_start > 0 && ori_hunk_start != ori_lines.length + 1) {
                throw 'invalid patch header';
            }

            //Reset sizes
            context_size = 0;
            deletion_size = 0;
            addition_size = 0;

            continue;
        }

        first_char = patch_line.charAt(0);

        switch (first_char) {
            case CONTEXT_INDICATOR:
                ori_lines.push(new_lines[line_index]);
                line_index++;
                context_size++;
                break;

            case DELETION_INDICATOR:
                //Put deleted line back
                ori_lines.push(patch_line.substr(1));
                deletion_size++;
                break;

            case ADDITION_INDICATOR:
                line_index++;
                addition_size++;
                break;

            default:
                throw 'Unrecognized initial character in unidiff line';
        }
    }

    //Validate the last patch header
    patch_hunk_header_validator(context_size,
                            ori_hunk_size, deletion_size,
                            new_hunk_size, addition_size);

    //Append the remaining lines
    while (line_index < new_len) {
        ori_lines.push(new_lines[line_index]);
        line_index++;
    }

    return ori_lines.join(NEW_LINE);
}