[Python-Dev] Fwd: Distributed RCS

Mon Aug 15 00:07:39 CEST 2005

On Sun, 2005-08-14 at 11:12 -0600, Neil Schemenauer wrote:
> On Sun, Aug 14, 2005 at 06:16:11PM +0200, "Martin v. Löwis" wrote:
> > It depends on what "a bit" is. Waiting a month would be fine; waiting
> > two years might be pointless.
> 
> It looks like the process of converting a CVS repository to
> Bazaar-NG does not yet work well (to be kind).  The path
> CVS->SVN->bzr would probably work better.  I suspect cvs2svn has
> been used on quite a few CVS repositories already.  I don't think
> going to SVN first would lose any information.

It doesn't.

As a data point, CVS2SVN can handle gcc's massive cvs repository, which
has merged rcs file information in it dating back to 1987, >1000 tags,
and > 300 branches.

Besides monotone's cvs_import, it's actually the only properly designed
cvs converter I've seen in a while (Properly designed in that it
actually uses the necessary and correct algorithms to get all the
weirdities of cvs branches and tags right).

I'm not sure how big python's repo is, but you probably want to use the
attached patch to speed up cvs2svn.  It changes it to reconstruct the
revisions on it's own instead of calling cvs or rcs.  For GCC, and KDE,
this makes a significant difference (17 hours for our 4 gig cvs repo
convresion instead of 52 hours), because it was spawning cvs/rcs 50
billion times, and the milliseconds add up :)


> My vote is to continue with the migration to SVN.  We can
> re-evaluate Bazaar-NG at a later time.
GCC is moving to SVN (very soon now, within 2 months), and this has been
my viewpoint as well.

It's much easier to go from something that has changesets and global
revisions, to a distributed system, if you want to, than it is to try to
reconstruct that info from CVS on your own :).

Subversion also has excellent language bindings, including the python
bindings.  That's how i've hooked it up to gcc's bugzilla.  You could
easily write something to transform *from* subversion to another system
using the bindings.

Things like viewcvs use the python bindings to deal with the svn
repository entirely.  

--Dan

-------------- next part --------------
Index: cvs2svn
===================================================================

--- cvs2svn	(revision 1423)
+++ cvs2svn	(working copy)
@@ -166,6 +166,10 @@
 # grouping.  See design-notes.txt for details.
 DATAFILE = 'cvs2svn-data'
 
+REVISIONS_DB = 'cvs2svn-cvsrepo.db'
+
+CHECKOUT_DB = 'cvs2svn-cvsco.db'
+
 # This file contains a marshalled copy of all the statistics that we
 # gather throughout the various runs of cvs2svn.  The data stored as a
 # marshalled dictionary.
@@ -355,40 +359,7 @@
                    " cvsroot\n" % (error_prefix, cvsroot, fname))
   sys.exit(1)
 
-def get_co_pipe(c_rev, extra_arguments=None):
-  """Return a command string, and the pipe created using that string.
-  C_REV is a CVSRevision, and EXTRA_ARGUMENTS is used to add extra
-  arguments.  The pipe returns the text of that CVS Revision."""
-  ctx = Ctx()
-  if extra_arguments is None:
-    extra_arguments = []
-  if ctx.use_cvs:
-    pipe_cmd = [ 'cvs' ] + ctx.cvs_global_arguments + \
-               [ 'co', '-r' + c_rev.rev, '-p' ] + extra_arguments + \
-               [ ctx.cvs_module + c_rev.cvs_path ];
-  else:
-    pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ] + extra_arguments + \
-               [ c_rev.rcs_path() ]
-  pipe = SimplePopen(pipe_cmd, True)
-  pipe.stdin.close()
-  return pipe_cmd, pipe
-
-def generate_ignores(c_rev):
-  # Read in props
-  pipe_cmd, pipe = get_co_pipe(c_rev)
-  buf = pipe.stdout.read(PIPE_READ_SIZE)
-  raw_ignore_val = ""
-  while buf:
-    raw_ignore_val = raw_ignore_val + buf
-    buf = pipe.stdout.read(PIPE_READ_SIZE)
-  pipe.stdout.close()
-  error_output = pipe.stderr.read()
-  exit_status = pipe.wait()
-  if exit_status:
-    sys.exit("%s: The command '%s' failed with exit status: %s\n"
-             "and the following output:\n"
-             "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
-
+def generate_ignores(raw_ignore_val):
   # Tweak props: First, convert any spaces to newlines...
   raw_ignore_val = '\n'.join(raw_ignore_val.split())
   raw_ignores = raw_ignore_val.split('\n')
@@ -614,9 +585,7 @@
 DB_OPEN_READ = 'r'
 DB_OPEN_NEW = 'n'
 
-# A wrapper for anydbm that uses the marshal module to store items as
-# strings.
-class Database:
+class SDatabase:
   def __init__(self, filename, mode):
     # pybsddb3 has a bug which prevents it from working with
     # Berkeley DB 4.2 if you open the db with 'n' ("new").  This
@@ -635,22 +604,24 @@
 
     self.db = anydbm.open(filename, mode)
 
-  def has_key(self, key):
-    return self.db.has_key(key)
+  def __getattr__(self, name):
+    return getattr(self.db, name)
 
+# A wrapper for anydbm that uses the marshal module to store items as
+# strings.
+class Database(SDatabase):
+
   def __getitem__(self, key):
     return marshal.loads(self.db[key])
 
   def __setitem__(self, key, value):
     self.db[key] = marshal.dumps(value)
 
-  def __delitem__(self, key):
-    del self.db[key]
-
   def get(self, key, default):
-    if self.has_key(key):
-      return self.__getitem__(key)
-    return default
+    try:
+      return marshal.loads(self.db[key])
+    except KeyError:
+      return default
 
 
 class StatsKeeper:
@@ -841,6 +812,192 @@
     Cleanup().register(temp(TAGS_DB), pass8)
 
 
+def msplit(stri):
+  re = [ i + "\n" for i in stri.split("\n") ]
+  re[-1] = re[-1][:-1]
+  if not re[-1]:
+    del re[-1]
+  return re
+
+
+class RCSStream:
+  ad_command = re.compile('^([ad])(\d+)\\s(\\d+)')
+  a_command = re.compile('^a(\d+)\\s(\\d+)')
+
+  def __init__(self):
+    self.texts = []
+
+  def copy(self):
+    ret = RCSStream()
+    ret.texts = self.texts[:]
+    return ret
+
+  def setText(self, text):
+    self.texts = msplit(text)
+
+  def getText(self):
+    return "".join(self.texts)
+
+  def applyDiff(self, diff):
+    diffs = msplit(diff)
+    adjust = 0
+    i = 0
+    while i < len(diffs):
+      admatch = self.ad_command.match(diffs[i])
+      i += 1
+      try:
+        cn = int(admatch.group(3))
+      except:
+        print diffs
+        raise RuntimeError, 'Error parsing diff commands'
+      if admatch.group(1) == 'd': # "d" - Delete command
+        sl = int(admatch.group(2)) - 1 + adjust
+        del self.texts[sl:sl + cn]
+        adjust -= cn
+      else: # "a" - Add command
+        sl = int(admatch.group(2)) + adjust
+        self.texts[sl:sl] = diffs[i:i + cn]
+        adjust += cn
+        i += cn
+
+  def invertDiff(self, diff):
+    diffs = msplit(diff)
+    ndiffs = []
+    adjust = 0
+    i = 0
+    while i < len(diffs):
+      admatch = self.ad_command.match(diffs[i])
+      i += 1
+      try:
+        cn = int(admatch.group(3))
+      except:
+        raise RuntimeError, 'Error parsing diff commands'
+      if admatch.group(1) == 'd': # "d" - Delete command
+        sl = int(admatch.group(2)) - 1 + adjust
+        # handle substitution explicitly, as add must come after del
+        # (last add may have incomplete line)
+        if i < len(diffs):
+          amatch = self.a_command.match(diffs[i])
+        else:
+          amatch = None
+        if amatch and int(amatch.group(1)) + adjust == sl + cn:
+          cn2 = int(amatch.group(2))
+          i += 1
+          ndiffs += ["d%d %d\na%d %d\n" % (sl + 1, cn2, sl + cn2, cn)] + \
+                    self.texts[sl:sl + cn]
+          self.texts[sl:sl + cn] = diffs[i:i + cn2]
+          adjust += cn2 - cn
+          i += cn2
+        else:
+          ndiffs += ["a%d %d\n" % (sl, cn)] + self.texts[sl:sl + cn]
+          del self.texts[sl:sl + cn]
+          adjust -= cn
+      else: # "a" - Add command
+        sl = int(admatch.group(2)) + adjust
+        ndiffs += ["d%d %d\n" % (sl + 1, cn)]
+        self.texts[sl:sl] = diffs[i:i + cn]
+        adjust += cn
+        i += cn
+    return "".join(ndiffs)
+
+  def zeroDiff(self):
+    if not self.texts:
+      return ""
+    return "a0 " + str(len(self.texts)) + "\n" + "".join(self.texts)
+
+
+class CVSCheckout:
+
+  class Rev: pass
+
+  __shared_state = { }
+  def __init__(self):
+    self.__dict__ = self.__shared_state
+
+  def init(self):
+    self.co_db = SDatabase(temp(CHECKOUT_DB), DB_OPEN_NEW)
+    Cleanup().register(temp(CHECKOUT_DB), pass8)
+    self.rev_db = SDatabase(temp(REVISIONS_DB), DB_OPEN_READ)
+    self.files = { }
+
+  def done(self):
+    print "leftover revisions:"
+    for file in self.files:
+      print file + ':',
+      for r in self.files[file]:
+        print r,
+      print
+    self.co_db.close()
+    self.rev_db.close()
+
+  def init_file(self, fname):
+    revs = { }
+    for line in self.rev_db[fname].split('\n'):
+      prv = None
+      for r in line.split():
+        try:
+          rev = revs[r]
+        except KeyError:
+          rev = CVSCheckout.Rev()
+          rev.ref = 0
+          rev.prev = None
+          revs[r] = rev
+        if prv:
+          revs[prv].prev = r
+          rev.ref += 1
+        prv = r
+    return revs
+
+  def checkout_i(self, fname, revs, r, co, ref):
+    rev = revs[r]
+    if rev.prev:
+      prev = revs[rev.prev]
+      try:
+        key = fname + '/' + rev.prev
+        co.setText(self.co_db[key])
+        prev.ref -= 1
+        if not prev.ref:
+#          print "used saved", fname, rev.prev, "- deleted"
+          del revs[rev.prev]
+          del self.co_db[key]
+#        else:
+#          print "used saved", fname, rev.prev, "- keeping. ref is now", prev.ref
+      except KeyError:
+        self.checkout_i(fname, revs, rev.prev, co, 1)
+    try:
+      co.applyDiff(self.rev_db[fname + '/' + r])
+    except KeyError:
+      pass
+    rev.ref -= ref
+    if rev.ref:
+#      print "checked out", fname, r, "- saving. ref is", rev.ref
+      self.co_db[fname + '/' + r] = co.getText()
+    else:
+#      print "checked out", fname, r, "- not saving"
+      del revs[r]
+
+  def checkout_ii(self, fname, revs, r, cvtnl=None):
+    co = RCSStream()
+    self.checkout_i(fname, revs, r, co, 0)
+    rv = co.getText()
+    if cvtnl:
+      rv = rv.replace('\r\n', '\n').replace('\r', '\n')
+    return rv
+
+  def checkout(self, c_rev, cvtnl=None):
+    try:
+      revs = self.files[c_rev.fname]
+      rv = self.checkout_ii(c_rev.fname, revs, c_rev.rev, cvtnl)
+      if not revs:
+        del self.files[c_rev.fname]
+    except KeyError:
+      revs = self.init_file(c_rev.fname)
+      rv = self.checkout_ii(c_rev.fname, revs, c_rev.rev, cvtnl)
+      if revs:
+        self.files[c_rev.fname] = revs
+    return rv
+
+
 class CVSRevision:
   def __init__(self, ctx, *args):
     """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
@@ -848,7 +1005,6 @@
     If CTX is None, the following members and methods of the
     instantiated CVSRevision class object will be unavailable (or
     simply will not work correctly, if at all):
-       cvs_path
        svn_path
        svn_trunk_path
        is_default_branch_revision()
@@ -870,7 +1026,6 @@
        prev_rev        -->  (string or None) previous CVS rev, e.g., "1.2"
        rev             -->  (string) this CVS rev, e.g., "1.3"
        next_rev        -->  (string or None) next CVS rev, e.g., "1.4"
-       file_in_attic   -->  (char or None) true if RCS file is in Attic
        file_executable -->  (char or None) true if RCS file has exec bit set. 
        file_size       -->  (int) size of the RCS file
        deltatext_code  -->  (char) 'N' if non-empty deltatext, else 'E'
@@ -883,16 +1038,16 @@
     The two forms of initialization are equivalent."""
 
     self._ctx = ctx
-    if len(args) == 16:
+    if len(args) == 15:
       (self.timestamp, self.digest, self.prev_timestamp, self.op,
-       self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
+       self.prev_rev, self.rev, self.next_rev,
        self.file_executable, self.file_size, self.deltatext_code,
        self.fname, 
        self.mode, self.branch_name, self.tags, self.branches) = args
     elif len(args) == 1:
-      data = args[0].split(' ', 14)
+      data = args[0].split(' ', 13)
       (self.timestamp, self.digest, self.prev_timestamp, self.op,
-       self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
+       self.prev_rev, self.rev, self.next_rev,
        self.file_executable, self.file_size, self.deltatext_code,
        self.mode, self.branch_name, numtags, remainder) = data
       # Patch up data items which are not simple strings
@@ -905,8 +1060,6 @@
         self.prev_rev = None
       if self.next_rev == "*":
         self.next_rev = None
-      if self.file_in_attic == "*":
-        self.file_in_attic = None
       if self.file_executable == "*":
         self.file_executable = None
       self.file_size = int(self.file_size)
@@ -923,12 +1076,11 @@
       self.branches = branches_and_fname[:-1]
       self.fname = branches_and_fname[-1]
     else:
-      raise TypeError, 'CVSRevision() takes 2 or 16 arguments (%d given)' % \
+      raise TypeError, 'CVSRevision() takes 2 or 15 arguments (%d given)' % \
           (len(args) + 1)
-    if ctx is not None:
-      self.cvs_path = relative_name(self._ctx.cvsroot, self.fname[:-2])
-      self.svn_path = self._make_path(self.cvs_path, self.branch_name)
-      self.svn_trunk_path = self._make_path(self.cvs_path)
+    if ctx is not None: # strictly speaking this check is now superfluous
+      self.svn_path = self._make_path(self.fname, self.branch_name)
+      self.svn_trunk_path = self._make_path(self.fname)
 
   # The 'primary key' of a CVS Revision is the revision number + the
   # filename.  To provide a unique key (say, for a dict), we just glom
@@ -941,10 +1093,10 @@
     return revnum + "/" + self.fname
 
   def __str__(self):
-    return ('%08lx %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
+    return ('%08lx %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s' % (
       self.timestamp, self.digest, self.prev_timestamp or "*", self.op,
       (self.prev_rev or "*"), self.rev, (self.next_rev or "*"),
-      (self.file_in_attic or "*"), (self.file_executable or "*"),
+      (self.file_executable or "*"),
       self.file_size,
       self.deltatext_code, (self.mode or "*"), (self.branch_name or "*"),
       len(self.tags), self.tags and " " or "", " ".join(self.tags),
@@ -967,11 +1119,11 @@
     return 0
 
   def is_default_branch_revision(self):
-    """Return 1 if SELF.rev of SELF.cvs_path is a default branch
+    """Return 1 if SELF.rev of SELF.fname is a default branch
     revision according to DEFAULT_BRANCHES_DB (see the conditions
     documented there), else return None."""
-    if self._ctx._default_branches_db.has_key(self.cvs_path):
-      val = self._ctx._default_branches_db[self.cvs_path]
+    if self._ctx._default_branches_db.has_key(self.fname):
+      val = self._ctx._default_branches_db[self.fname]
       val_last_dot = val.rindex(".")
       our_last_dot = self.rev.rindex(".")
       default_branch = val[:val_last_dot]
@@ -1031,19 +1183,6 @@
     else:
       return self._ctx.trunk_base + '/' + path
 
-  def rcs_path(self):
-    """Returns the actual filesystem path to the RCS file of this
-    CVSRevision."""
-    if self.file_in_attic is None:
-      return self.fname
-    else:
-      basepath, filename = os.path.split(self.fname)
-      return os.path.join(basepath, 'Attic', filename)
-
-  def filename(self):
-    "Return the last path component of self.fname, minus the ',v'"
-    return os.path.split(self.fname)[-1][:-2]
-
 class SymbolDatabase:
   """This database records information on all symbols in the RCS
   files.  It is created in pass 1 and it is used in pass 2."""
@@ -1177,6 +1316,8 @@
   def __init__(self):
     self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
     Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
+    self.revisions_db = SDatabase(temp(REVISIONS_DB), DB_OPEN_NEW)
+    Cleanup().register(temp(REVISIONS_DB), pass8)
     self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
     Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
     self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB), DB_OPEN_NEW)
@@ -1211,6 +1352,8 @@
     if not canonical_name == filename:
       self.file_in_attic = 1
 
+    self.stream = RCSStream()
+
     file_stat = os.stat(filename)
     # The size of our file in bytes
     self.file_size = file_stat[stat.ST_SIZE]
@@ -1247,6 +1390,8 @@
     # distinguish between an add and a change.
     self.rev_state = { }
 
+    self.empty_1111 = None
+
     # Hash mapping branch numbers, like '1.7.2', to branch names,
     # like 'Release_1_0_dev'.
     self.branch_names = { }
@@ -1505,6 +1650,10 @@
         # finished the for-loop (no resyncing was performed)
         return
 
+  def writeout(self, r, tx):
+    if tx:
+      self.revisions_db[self.rel_name + '/' + r] = tx
+
   def set_revision_info(self, revision, log, text):
     timestamp, author, old_ts = self.rev_data[revision]
     digest = sha.new(log + '\0' + author).hexdigest()
@@ -1552,13 +1701,15 @@
       deltatext_code = DELTATEXT_NONEMPTY
     else:
       deltatext_code = DELTATEXT_EMPTY
+      if revision == '1.1.1.1':
+        self.empty_1111 = 1
 
     c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp, op,
                         self.prev_rev[revision], revision,
                         self.next_rev.get(revision),
-                        self.file_in_attic, self.file_executable,
+                        self.file_executable,
                         self.file_size,
-                        deltatext_code, self.fname,
+                        deltatext_code, self.rel_name,
                         self.mode, self.rev_to_branch_name(revision),
                         self.taglist.get(revision, []),
                         self.branchlist.get(revision, []))
@@ -1568,6 +1719,16 @@
     if not self.metadata_db.has_key(digest):
       self.metadata_db[digest] = (author, log)
 
+    if trunk_rev.match(revision):
+      if revision not in self.next_rev:
+        self.stream.setText(text)
+      else:
+        self.writeout(self.next_rev[revision], self.stream.invertDiff(text))
+      if not self.prev_rev[revision]:
+        self.writeout(revision, self.stream.zeroDiff())
+    else:
+      self.writeout(revision, text)
+
   def parse_completed(self):
     # Walk through all branches and tags and register them with
     # their parent branch in the symbol database.
@@ -1579,8 +1740,33 @@
 
     self.num_files = self.num_files + 1
 
+    tree = [ ]
+    for r in self.prev_rev:
+      if r not in self.next_rev and not (r == "1.1.1.1" and self.empty_1111):
+        while self.rev_state[r] == 'dead':
+          pr = self.prev_rev[r]
+          if not pr:
+            break
+          if self.next_rev.get(pr) != r:
+            break
+          r = pr
+        else:
+          rvs = [ ]
+          while 1:
+            rvs.append(r)
+            pr = self.prev_rev[r]
+            if not pr:
+              break
+            if self.next_rev.get(pr) != r:
+              rvs.append(pr)
+              break
+            r = pr
+          tree.append(" ".join(rvs))
+    self.revisions_db[self.rel_name] = "\n".join(tree)
+
   def write_symbol_db(self):
     self.symbol_db.write()
+    self.revisions_db.close()
 
 class SymbolingsLogger:
   """Manage the file that contains lines for symbol openings and
@@ -2038,7 +2224,7 @@
         if not c_rev.branches:
           continue
         cvs_generated_msg = ('file %s was initially added on branch %s.\n'
-                             % (c_rev.filename(),
+                             % (os.path.split(c_rev.fname)[-1],
                                 c_rev.branches[0]))
         author, log_msg = \
             Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
@@ -3389,7 +3575,7 @@
     keywords = None
 
     if self.mime_mapper:
-      mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
+      mime_type = self.mime_mapper.get_type_from_filename(c_rev.fname)
 
     if not c_rev.mode == 'b':
       if not self.no_default_eol:
@@ -3684,10 +3870,12 @@
     if props_len:
       props_header = 'Prop-content-length: %d\n' % props_len
 
+    co = CVSCheckout().checkout(c_rev, s_item.needs_eol_filter)
+
     # treat .cvsignore as a directory property
     dir_path, basename = os.path.split(c_rev.svn_path)
     if basename == ".cvsignore":
-      ignore_vals = generate_ignores(c_rev)
+      ignore_vals = generate_ignores(co)
       ignore_contents = '\n'.join(ignore_vals)
       ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
                          (len(ignore_contents), ignore_contents))
@@ -3705,73 +3893,24 @@
                           % (self._utf8_path(dir_path), ignore_len,
                              ignore_len, ignore_contents))
 
-    # If the file has keywords, we must use -kk to prevent CVS/RCS from
-    # expanding the keywords because they must be unexpanded in the
-    # repository, or Subversion will get confused.
-    if s_item.has_keywords:
-      pipe_cmd, pipe = get_co_pipe(c_rev, [ '-kk' ])
-    else:
-      pipe_cmd, pipe = get_co_pipe(c_rev)
+    checksum = md5.new()
+    checksum.update(co)
 
     self.dumpfile.write('Node-path: %s\n'
                         'Node-kind: file\n'
                         'Node-action: %s\n'
                         '%s'  # no property header if no props
-                        'Text-content-length: '
+                        'Text-content-length: %d\n'
+                        'Text-content-md5: %s\n'
+                        'Content-length: %d\n'
+                        '\n'
                         % (self._utf8_path(c_rev.svn_path),
-                           action, props_header))
-
-    pos = self.dumpfile.tell()
-
-    self.dumpfile.write('0000000000000000\n'
-                        'Text-content-md5: 00000000000000000000000000000000\n'
-                        'Content-length: 0000000000000000\n'
-                        '\n')
-
+                           action, props_header,
+                           len(co), checksum.hexdigest(),
+                           len(co) + props_len))
     if prop_contents:
       self.dumpfile.write(prop_contents)
-
-    # Insert a filter to convert all EOLs to LFs if neccessary
-    if s_item.needs_eol_filter:
-      data_reader = LF_EOL_Filter(pipe.stdout)
-    else:
-      data_reader = pipe.stdout
-
-    # Insert the rev contents, calculating length and checksum as we go.
-    checksum = md5.new()
-    length = 0
-    while True:
-      buf = data_reader.read(PIPE_READ_SIZE)
-      if buf == '':
-        break
-      checksum.update(buf)
-      length = length + len(buf)
-      self.dumpfile.write(buf)
-
-    pipe.stdout.close()
-    error_output = pipe.stderr.read()
-    exit_status = pipe.wait()
-    if exit_status:
-      sys.exit("%s: The command '%s' failed with exit status: %s\n"
-               "and the following output:\n"
-               "%s" % (error_prefix, pipe_cmd, exit_status, error_output))
-
-    # Go back to patch up the length and checksum headers:
-    self.dumpfile.seek(pos, 0)
-    # We left 16 zeros for the text length; replace them with the real
-    # length, padded on the left with spaces:
-    self.dumpfile.write('%16d' % length)
-    # 16... + 1 newline + len('Text-content-md5: ') == 35
-    self.dumpfile.seek(pos + 35, 0)
-    self.dumpfile.write(checksum.hexdigest())
-    # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
-    self.dumpfile.seek(pos + 84, 0)
-    # The content length is the length of property data, text data,
-    # and any metadata around/inside around them.
-    self.dumpfile.write('%16d' % (length + props_len))
-    # Jump back to the end of the stream
-    self.dumpfile.seek(0, 2)
-
+    self.dumpfile.write(co)
     # This record is done (write two newlines -- one to terminate
     # contents that weren't themselves newline-termination, one to
     # provide a blank line for readability.
@@ -4208,7 +4347,7 @@
                         warning_prefix)
 
         msg = "RESYNC: '%s' (%s): old time='%s' delta=%ds" \
-              % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
+              % (c_rev.fname, c_rev.rev, time.ctime(c_rev.timestamp),
                  record[2] - c_rev.timestamp)
         Log().write(LOG_VERBOSE, msg)
 
@@ -4322,6 +4461,9 @@
   Log().write(LOG_QUIET, "Done.")
 
 def pass8():
+  checkout = CVSCheckout()
+  checkout.init()
+
   svncounter = 2 # Repository initialization is 1.
   repos = SVNRepositoryMirror()
   persistence_manager = PersistenceManager(DB_OPEN_READ)
@@ -4346,6 +4488,8 @@
 
   repos.finish()
 
+  checkout.done()
+
 _passes = [
   pass1,
   pass2,
@@ -4389,7 +4533,6 @@
     self.no_default_eol = 0
     self.eol_from_mime_type = 0
     self.keywords_off = 0
-    self.use_cvs = None
     self.svnadmin = "svnadmin"
     self.username = None
     self.print_help = 0
@@ -4492,8 +4635,6 @@
   print '  --profile            profile with \'hotshot\' (into file cvs2svn.hotshot)'
   print '  --dry-run            do not create a repository or a dumpfile;'
   print '                       just print what would happen.'
-  print '  --use-cvs            use CVS instead of RCS \'co\' to extract data'
-  print '                       (only use this if having problems with RCS)'
   print '  --svnadmin=PATH      path to the svnadmin program'
   print '  --trunk-only         convert only trunk commits, not tags nor branches'
   print '  --trunk=PATH         path for trunk (default: %s)'    \
@@ -4538,7 +4679,7 @@
                                  "username=", "existing-svnrepos",
                                  "branches=", "tags=", "encoding=",
                                  "force-branch=", "force-tag=", "exclude=",
-                                 "use-cvs", "mime-types=",
+                                 "mime-types=",
                                  "eol-from-mime-type", "no-default-eol",
                                  "trunk-only", "no-prune", "dry-run",
                                  "dump-only", "dumpfile=", "tmpdir=",
@@ -4588,8 +4729,6 @@
       ctx.dumpfile = value
     elif opt == '--tmpdir':
       ctx.tmpdir = value
-    elif opt == '--use-cvs':
-      ctx.use_cvs = 1
     elif opt == '--svnadmin':
       ctx.svnadmin = value
     elif opt == '--trunk-only':
@@ -4673,30 +4812,6 @@
                      "existing directory.\n" % ctx.cvsroot)
     sys.exit(1)
 
-  if ctx.use_cvs:
-    # Ascend above the specified root if necessary, to find the cvs_repository
-    # (a directory containing a CVSROOT directory) and the cvs_module (the
-    # path of the conversion root within the cvs repository)
-    # NB: cvs_module must be seperated by '/' *not* by os.sep .
-    ctx.cvs_repository = os.path.abspath(ctx.cvsroot)
-    prev_cvs_repository = None
-    ctx.cvs_module = ""
-    while prev_cvs_repository != ctx.cvs_repository:
-      if os.path.isdir(os.path.join(ctx.cvs_repository, 'CVSROOT')):
-        break
-      prev_cvs_repository = ctx.cvs_repository
-      ctx.cvs_repository, module_component = os.path.split(ctx.cvs_repository)
-      ctx.cvs_module = module_component + "/" + ctx.cvs_module
-    else:
-      # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
-      sys.stderr.write(error_prefix +
-                       ": the path '%s' is not a CVS repository, nor a path " \
-                       "within a CVS repository.  A CVS repository contains " \
-                       "a CVSROOT directory within its root directory.\n" \
-                       % ctx.cvsroot)
-      sys.exit(1)
-    os.environ['CVSROOT'] = ctx.cvs_repository
-
   if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
     sys.stderr.write(error_prefix +
                      ": must pass one of '-s' or '--dump-only'.\n")
@@ -4772,28 +4887,6 @@
                      % ctx.tmpdir)
     sys.exit(1)
 
-  if ctx.use_cvs:
-    def cvs_ok():
-      pipe = SimplePopen([ 'cvs' ] + Ctx().cvs_global_arguments + \
-                         [ '--version' ], True)
-      pipe.stdin.close()
-      pipe.stdout.read()
-      errmsg = pipe.stderr.read()
-      status = pipe.wait()
-      ok = len(errmsg) == 0 and status == 0
-      return (ok, status, errmsg)
-
-    ctx.cvs_global_arguments = [ "-q", "-R" ]
-    ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
-    if not ok:
-      ctx.cvs_global_arguments = [ "-q" ]
-      ok, cvs_exitstatus, cvs_errmsg = cvs_ok()
-
-    if not ok:
-      sys.stderr.write(error_prefix +
-                       ": error executing CVS: status %s, error output:\n" \
-                       % (cvs_exitstatus) + cvs_errmsg)
-  
   # But do lock the tmpdir, to avoid process clash.
   try:
     os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))