[Quick-and-dirty patch to get tags out of the RCS files. What this does: * Gets the tags out of the RCS files. Seems to mostly work. Passes the first few tests in sanity.sh. Things this doesn't do/include: * Maintain compatibility in repository storage with other versions of CVS, past or future. Don't say you weren't warned. It is very likely that if similar functionality becomes part of CVS, it will not do things identically. * Pass the rest of sanity.sh. The problem is the order in which it puts the tags. RCS_settags should be adding the new tag at the beginning, not at the end. This makes the rdiff-7 test fail (and I would imagine many other tests too). I expect that user habits and/or scripts depend on the order, so it is probably worth preserving. * Any change to locking. The ability to lock tags separately from other data is probably at least as important as the ability to store them separately. * On a more mundane note, the code in import.c is buggy in that it doesn't take out writelocks, which could cause the fileattr file to be corrupted. * Performance measurements. This code is almost surely rough and in need of more a more careful look at performance to see whether it is getting the kinds of gains that people have looked for from separating tags. * Would need to be an option in CVSROOT/config, of course. For example, SeparateTags=yes or SeparateTags=no. * The unusual cleanliness issues: documentation (cvs.texinfo, NEWS), ChangeLog entry, read the code more carefully (especially in terms of looking for things like memory leaks). * Closer look at compatibility (for example, converting repositories from one format to the other, what happens if you run the wrong CVS on a repository, &c). Also importing and exporting RCS files. * Presumably this wants to be combined with Ian's patch for _head _branch and _attic. What else? Keyword expansion mode? * The patch uses "foo,v" rather than "foo" as the filename for the _tags attribute. Mostly harmless, but ugly and so should be fixed. * As for whether this is the right general idea, this depends on the performance measurements and such to some extent but I am inclined to favor instead (1) the RCS file padding kludge in the context of preserving compatibility, and (2) in the context of a new incompatible mechanism, introducing some kind of per-directory or per-directory-tree change ID, so you don't need to store revision numbers of individual files at all, rather than just moving around where you store it. I think the patch is relative to a development version of CVS as of December 1998 (CVS 1.10.4.1). I'm told it doesn't apply as-is to CVS 1.10, but it is more of a proof of concept than anything else.... -kingdon] Index: import.c =================================================================== RCS file: /home2/cvsroot/ccvs/src/import.c,v retrieving revision 1.109 diff -u -r1.109 import.c --- import.c 1998/09/30 17:34:26 1.109 +++ import.c 1998/12/07 20:01:38 @@ -19,6 +19,7 @@ #include "cvs.h" #include "savecwd.h" #include +#include "fileattr.h" static char *get_comment PROTO((char *user)); static int add_rev PROTO((char *message, RCSNode *rcs, char *vfile, @@ -403,6 +404,7 @@ /* first, load up any per-directory ignore lists */ ign_add_file (CVSDOTIGNORE, 1); wrap_add_file (CVSDOTWRAPPER, 1); + fileattr_startdir (repository); if ((dirp = CVS_OPENDIR (".")) == NULL) { @@ -495,6 +497,8 @@ dellist(&dirlist); } + fileattr_write (); + fileattr_free (); return (err); } @@ -1105,6 +1109,8 @@ goto write_error; } +#if 0 + /* SeparateTags=no. */ for (i = targc - 1; i >= 0; i--) { /* RCS writes the symbols backwards */ @@ -1118,6 +1124,7 @@ if (fprintf (fprcs, "%s:%s", vtag, add_vbranch) < 0) goto write_error; } +#endif /* We set the tags later for SeparateTags=yes. */ if (fprintf (fprcs, ";\012") < 0) goto write_error; @@ -1384,6 +1391,58 @@ error (0, errno, "cannot remove %s", tocvsPath); if (free_opt != NULL) free (free_opt); + +#if 1 + /* SeparateTags=yes. FIXME: the bit about RCS_parse is totally + unnecessary. Maybe the clean solution is to just switch over + to having import.c use update_rcs_file (would want to see + whether that would be a slowdown these days; certainly the + issues have changed but I don't know how much). */ + + if (add_vbranch != NULL) + { + char *names = xstrdup (rcs); + char *basename = last_component (names); + RCSNode *rcsnode; + char *newtag; + char *p; + + assert (basename != names); + basename[-1] = '\0'; + p = basename + strlen (basename); + assert (p[-1] == 'v' && p[-2] == ','); + p[-2] = '\0'; + + rcsnode = RCS_parse (basename, names); + if (rcsnode == NULL) + error (1, 0, "cannot parse %s", rcs); + + if (targc > 0) + { + assert (add_vbranch != NULL); + newtag = xmalloc (strlen (add_vbranch) + 10); + sprintf (newtag, "%s.1", add_vbranch); + } + + for (i = targc - 1; i >= 0; i--) + { + /* RCS writes the symbols backwards */ + if (RCS_settag (rcsnode, targv[i], newtag) < 0) + error (1, errno, "cannot tag"); + } + + if (add_vbranch != NULL) + { + if (RCS_settag (rcsnode, vtag, add_vbranch) < 0) + error (1, errno, "cannot tag"); + } + + free (newtag); + free (names); + freercsnode (&rcsnode); + } +#endif + return (err); write_error: Index: rcs.c =================================================================== RCS file: /home2/cvsroot/ccvs/src/rcs.c,v retrieving revision 1.208 diff -u -r1.208 rcs.c --- rcs.c 1998/09/25 01:25:18 1.208 +++ rcs.c 1998/12/07 20:16:07 @@ -12,6 +12,7 @@ #include "cvs.h" #include "edit.h" #include "hardlink.h" +#include "fileattr.h" int preserve_perms = 0; @@ -75,7 +76,9 @@ struct rcsbuffer *)); static int checkmagic_proc PROTO((Node *p, void *closure)); static void do_branches PROTO((List * list, char *val)); +#if 0 /* SeparateTags */ static void do_symbols PROTO((List * list, char *val)); +#endif static void do_locks PROTO((List * list, char *val)); static void free_rcsnode_contents PROTO((RCSNode *)); static void free_rcsvers_contents PROTO((RCSVers *)); @@ -108,7 +111,9 @@ static void RCS_putdesc PROTO ((RCSNode *, FILE *)); static void putdelta PROTO ((RCSVers *, FILE *)); static int putrcsfield_proc PROTO ((Node *, void *)); +#if 0 /* SeparateTags */ static int putsymbol_proc PROTO ((Node *, void *)); +#endif static void RCS_copydeltas PROTO ((RCSNode *, FILE *, struct rcsbuffer *, FILE *, Deltatext *, char *)); static int count_delta_actions PROTO ((Node *, void *)); @@ -2277,6 +2282,8 @@ } +#if 0 +/* Only for SeparateTags=no. */ /* * process the symbols list of the rcs file */ @@ -2316,6 +2323,7 @@ (void) addnode (list, p); } } +#endif /* Only for SeparateTags=no. */ /* * process the locks list of the rcs file @@ -2557,9 +2565,12 @@ /* make sure we have something to look at... */ assert (rcs != NULL); +#if 0 + /* Not necessary for SeparateTags=yes, certainly. */ /* XXX this is probably not necessary, --jtc */ if (rcs->flags & PARTIAL) RCS_reparsercsfile (rcs, (FILE **) NULL, (struct rcsbuffer *) NULL); +#endif /* If tag is "HEAD", special case to get head RCS revision */ if (tag && (STREQ (tag, TAG_HEAD) || *tag == '\0')) @@ -2637,6 +2648,12 @@ while (tag[strlen (tag) - 1] == '.') tag[strlen (tag) - 1] = '\0'; + /* Needed for the access to rcs->versions below. FIXME? Should + there be a function which does the findnode (rcs->versions) and also + takes care of PARTIAL? */ + if (rcs->flags & PARTIAL) + RCS_reparsercsfile (rcs, (FILE **) NULL, (struct rcsbuffer *) NULL); + if ((numdots (tag) & 1) == 0) { char *branch; @@ -3409,6 +3426,7 @@ { assert(rcs != NULL); +#if 0 if (rcs->flags & PARTIAL) RCS_reparsercsfile (rcs, (FILE **) NULL, (struct rcsbuffer *) NULL); @@ -3418,6 +3436,71 @@ free(rcs->symbols_data); rcs->symbols_data = NULL; } +#else + if (rcs->symbols == NULL) + { + char *cp; + + rcs->symbols = getlist (); + /* See comment in translate_symtag about a fileattr_modify analogue + to get values. But here we want a walklist style interface, or + some such. */ + cp = fileattr_get (last_component (rcs->path), "_tags"); + if (cp == NULL) + return rcs->symbols; + + for (;;) + { + char *tag; + size_t taglen; + char *rev; + size_t revlen; + Node *p; + + /* if we got to the end, we are done */ + if (*cp == '\0') + break; + + /* split it up into tag and rev */ + tag = cp; + cp = strchr (cp, ':'); + assert (cp != NULL); + taglen = cp - tag; + rev = cp + 1; + + /* The value we are looking for is terminated by ';' + or '\0' or ','. */ + cp = strchr (rev, ','); + if (cp == NULL) + { + cp = strchr (rev, ';'); + } + if (cp == NULL) + { + revlen = strlen (rev); + cp = rev + revlen; + } + else + { + revlen = cp - rev; + ++cp; + } + + /* make a new node and add it to the list */ + p = getnode (); + + p->key = xmalloc (taglen + 1); + strncpy (p->key, tag, taglen); + p->key[taglen] = '\0'; + + p->data = xmalloc (revlen + 1); + strncpy (p->data, rev, revlen); + p->data[revlen] = '\0'; + + (void) addnode (rcs->symbols, p); + } + } +#endif return rcs->symbols; } @@ -3431,6 +3514,13 @@ RCSNode *rcs; const char *tag; { + char *taglist; + char *p; + /* FIXME: make sure to nuke the other "len" variable in the + #if 0 code when doing the SeparateTags thing. */ + size_t len; +#if 0 + /* This is for SeparateTags=no. */ if (rcs->flags & PARTIAL) RCS_reparsercsfile (rcs, (FILE **) NULL, (struct rcsbuffer *) NULL); @@ -3482,7 +3572,57 @@ ++cp; } } +#else + /* SeparateTags=yes. Cool item #1: we don't need to call + RCS_reparsercsfile, which should speed up things like seeing + whether "cvs update" on a branch needs to do anything. */ + + taglist = fileattr_get (last_component (rcs->path), "_tags"); + len = strlen (tag); + + /* FIXME: There probably should be some analogue to + fileattr_modify for getting a value. Could also presumably be + used in fileattr_get, editors_fileproc, &c. Maybe the right + calling convention is that it passes back a char * and a length. */ + p = taglist; + while (p) + { + char *q; + char *r; + + r = strchr (p, ':'); + if (r != NULL + && r - p == len + && strncmp (tag, p, len) == 0) + { + /* Found it. */ + char *retval; + + /* The value we are looking for is terminated by ';' + or '\0' or ','. */ + q = strchr (p, ','); + if (q == NULL) + { + q = strchr (p, ';'); + if (q == NULL) + { + q = p + strlen (p); + } + } + + retval = xmalloc (q - (p + len + 1) + 1); + strncpy (retval, p + len + 1, q - (p + len + 1)); + retval[q - (p + len + 1)] = '\0'; + return retval; + } + p = strchr (p, ','); + if (p == NULL) + break; + ++p; + } +#endif + return NULL; } @@ -5811,11 +5951,17 @@ const char *tag; const char *rev; { + char *taglist; + char *newlist; + +#if 0 + /* This would only be needed for SeparateTags=no. */ List *symbols; Node *node; if (rcs->flags & PARTIAL) RCS_reparsercsfile (rcs, (FILE **) NULL, (struct rcsbuffer *) NULL); +#endif /* FIXME: This check should be moved to RCS_check_tag. There is no reason for it to be here. */ @@ -5836,6 +5982,8 @@ if (rev == NULL) rev = rcs->branch ? rcs->branch : rcs->head; +#if 0 + /* This would only be needed for SeparateTags=no. */ /* At this point rcs->symbol_data may not have been parsed. Calling RCS_symbols will force it to be parsed into a list which we can easily manipulate. */ @@ -5859,6 +6007,18 @@ (void) addnode_at_front (symbols, node); } +#else + /* SeparateTags=yes. */ + + taglist = fileattr_get0 (last_component (rcs->path), "_tags"); + newlist = fileattr_modify (taglist, tag, rev, ':', ','); + fileattr_set (last_component (rcs->path), "_tags", newlist); + if (taglist != NULL) + free (taglist); + if (newlist != NULL) + free (newlist); +#endif + return 0; } @@ -5871,6 +6031,10 @@ RCSNode *rcs; const char *tag; { + char *taglist; + char *newlist; +#if 0 + /* SeparateTags=no. */ List *symbols; Node *node; if (rcs->flags & PARTIAL) @@ -5885,6 +6049,16 @@ return 1; delnode (node); +#else + /* SeparateTags=yes. */ + taglist = fileattr_get0 (last_component (rcs->path), "_tags"); + newlist = fileattr_modify (taglist, tag, NULL, ':', ','); + fileattr_set (last_component (rcs->path), "_tags", newlist); + if (taglist != NULL) + free (taglist); + if (newlist != NULL) + free (newlist); +#endif return 0; } @@ -7897,6 +8071,8 @@ return d; } +#if 0 +/* Only for SeparateTags=no. */ /* RCS output functions, for writing RCS format files from RCSNode structures. @@ -7927,6 +8103,7 @@ fputs (symnode->data, fp); return 0; } +#endif /* Only for SeparateTags=no. */ static int putlock_proc PROTO ((Node *, void *)); @@ -8041,6 +8218,10 @@ fputs (RCSSYMBOLS, fp); /* If we haven't had to convert the symbols to a list yet, don't force a conversion now; just write out the string. */ +#if 0 + /* For SeparateTags=yes, don't write tags to the RCS file. + Probably want to think more about how tags in the RCS file + interact with the real tags. */ if (rcs->symbols == NULL && rcs->symbols_data != NULL) { fputs ("\n\t", fp); @@ -8048,6 +8229,7 @@ } else walklist (RCS_symbols (rcs), putsymbol_proc, (void *) fp); +#endif fputs (";\n", fp); fputs ("locks", fp); @@ -8525,6 +8707,13 @@ /* Make sure we're operating on an actual file and not a symlink. */ resolve_symlink (&(rcs->path)); + + /* The PARTIAL case happens if we have only written tags + (SeparateTags=yes). FIXME? Should there be some kind of + assert(), which throws up if the caller calls us when nothing + has changed? Might catch some performance bugs... */ + if (rcs->flags & PARTIAL) + RCS_reparsercsfile (rcs, (FILE **) NULL, (struct rcsbuffer *) NULL); fout = rcs_internal_lockfile (rcs->path);