User:Tardis/regdiff.py

This is an old revision of this page, as edited by Tardis (talk | contribs) at 10:56, 9 November 2008 (program to compare Windows registry dumps). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
#!/usr/bin/env python
#regdiff.py
#Created November 9 2008
#Updated November 9 2008
#Version 0.1

#This program is free software; you can redistribute it and/or modify it under
#the terms of the GNU General Public License version 2, the GNU Free
#Documentation License version 1.2, or (at your option) any later version of
#either license.

#Compares two Windows registry (.REG) files and produces a third such that
#applying it transforms a registry represented by the first into a registry
#represented by the second.  (That is, it deletes all keys and values present
#in the first but not in the second!)

#BUGS:
#Assumes that no key contains assignments to @ and to "" -- I think the latter
#is invalid anyway.  Also changes any literal \r\n in strings to \n.

import sys,codecs

class line(object):
    def __init__(self,s,k=None):
        """Parse s and make a line object.
Inherit key from line k unless we are a key.
Names are not unescaped, but escaping is considered in their extent."""
        self.old=False
        self.str=s
        self.eof=s is None
        self.iskey=not self.eof and len(s)>1 and s[0]=='['
        index=1                 # points past end of name
        if self.eof:
            self.name=None
            self.delete=False
            self.lastkey=None
        elif self.iskey:
            self.delete=s[1]=='-'
            self.lastkey=self.name=s[1+self.delete:-2] # ends in "]\n"
        else:
            if s[0]=='"':
                quote=False
                for c in s[1:]:
                    index+=1
                    if quote: quote=False
                    elif c=='\\': quote=True
                    elif c=='"': break
                else: raise IOError,"unterminated name in "+repr(s)
            elif s[0]=='@': pass # key's default value
            else: raise IOError,"unrecognized format: "+repr(s)
            # The name for @ is "", which properly sorts before everything.
            self.name=s[1:index-1]
            assign=s[index:].lstrip()
            if len(assign)<2 or assign[0]!='=':
                raise IOError,"no assignment in" +repr(s)
            self.delete=assign[1]=='-'
            self.lastkey=None if k is None else k.lastkey

    def valname(self):
        """Return the original form of this value's name."""
        if self.iskey: raise ValueError,"this is not a value"
        return '"'+self.name+'"' if self.name else '@'

    def __str__(self):
        return self.__class__.__name__+\
            ('['+("EOF" if self.eof else
                  repr(self.str)+(" (key)" if self.iskey else
                                  " in key "+repr(self.lastkey)))+
             ": "+repr(self.name)+']')

class keyprint(object):
    def __init__(self,o):
        self.key=None
        self.out=o
    def __call__(self,k):
        if k!=self.key:
            self.key=k
            self.out.write("\n["+k+"]\n")

def terminated(s):
    """Return true if the string contains an even number of unquoted \"s and does
not end in an unescaped backslash."""
    quote=False
    ret=True
    for c in s:
        if quote: quote=False
        elif c=='\\': quote=True
        elif c=='"': ret=not ret
    return ret and not quote

def nextLogical(f):
    """Return the next logical line from a file object.
Never returns a null string.
Return None at EOF."""
    ret=""
    done=False
    while not done:
        l=f.readline()
        if l=="":
            if ret=="": return None
            else: raise IOError,"file ends with escape or in string"
        c=l.rstrip('\r\n')
        if c=="" and ret=="": continue # skip initial blank lines
        ret+=c
        done=terminated(ret)
        #print >>sys.stderr,repr(l)+'/'+repr(c)+" -> "+repr(ret)+": term="+str(done)
        if c!=l: ret+='\n'
    return ret

def isunder(s,r):
    """Return non-nil if the key s is in the tree rooted at r."""
    return s==r or s.startswith(r+'\\');

if len(sys.argv)!=3:
    print >>sys.stderr,"usage: "+sys.argv[0]+" old.reg new.reg"
    sys.exit(2)                 # BAD_ARGS

ci=codecs.lookup("utf_16")
fo=ci.streamreader(open(sys.argv[1],'rb'))
fn=ci.streamreader(open(sys.argv[2],'rb'))
out=ci.streamwriter(sys.stdout)
kp=keyprint(out)
head=fo.readline()
if fn.readline()!=head:
    raise IOError,"different file headers"
out.write(head.rstrip('\r\n')+'\n')

o=n=line(None)
o.old=True
killing=False                   # the tree being deleted, if any
while True:
    if o.old: o=line(nextLogical(fo),o)
    if n.old: n=line(nextLogical(fn),n)

    if o.eof and n.eof: break
    if o.delete or n.delete: raise IOError,"input contains deletion requests"

    # Determine which line logically comes first; all keys come after all
    # values (since the values go with a previous key), and EOF comes after
    # everything.  Positive values mean that n comes first.
    c=o.eof-n.eof or cmp(o.lastkey,n.lastkey) or o.iskey-n.iskey or cmp(o.name,n.name)
    o.old=c<=0
    n.old=c>=0

    if killing and (o.eof or not isunder(o.lastkey,killing)): killing=False

    if not killing:
        if c<0:
            if o.iskey:
                # Delete a whole key if the new file is past all its subkeys.
                # Note that n.lastkey!=o.name, because n must be a key.
                if n.eof or not isunder(n.lastkey,o.name):
                    killing=o.name
                    out.write("\n[-"+o.name+"]\n")
            else:
                kp(o.lastkey)
                out.write(o.valname()+"=-\n")
        elif not n.iskey and n.str!=o.str:
            kp(n.lastkey)
            out.write(n.str)

out.write('\n')