diff --git a/biblib/bib.py b/biblib/bib.py index 495ab87..21e19b4 100644 --- a/biblib/bib.py +++ b/biblib/bib.py @@ -206,13 +206,35 @@ def _scan_command_or_entry(self): if not self._try_tok('@'): return None - # Scan command or entry type + # Scan command or entry type. + # ID_RE includes '@', so "@comment@string" is scanned as one token + # (typ == 'comment@string'). We detect the "@comment@..." + # pattern by checking if the identifier ends with '@' — this means + # the second '@' was consumed as part of the identifier, indicating + # @comment was NOT followed by a valid delimiter. In this case, + # @comment@string is valid entry type "string" per the BibTeX spec. + ident_start = self.__off typ = self._scan_identifier().lower() + # Handle "@comment@..." pattern. ID_RE matches '@', so + # "@comment@string" is consumed as one identifier token. + # We detect this by checking if 'comment@' appears at the start. + if typ.startswith('comment@') and typ != 'comment': + # "@comment@string" was scanned as identifier "comment@string". + # Per BibTeX spec, @comment@string is entry type "string". + # Rewind to the second '@'; the recursive call will consume + # it and scan the real entry type. Return its result so the + # outer call does not fall through to entry parsing. + self.__off = ident_start + typ.index('@', 1) + self.__in_comment_rewind = True + try: + result = self._scan_command_or_entry() + finally: + self.__in_comment_rewind = False + return result + if typ == 'comment': - # Believe it or not, BibTeX doesn't do anything with what - # comes after an @comment, treating it like any other - # inter-entry noise. + # Valid @comment command (followed by whitespace, {, or ,). return None left = self._tok('[{(]', 'expected { or ( after entry type') @@ -224,7 +246,7 @@ def _scan_command_or_entry(self): self._tok(right_re, 'expected '+right) return None - if typ == 'string': + if typ == 'string' and not getattr(self, '_Parser__in_comment_rewind', False): name = self._scan_identifier().lower() if name in self.__macros: self._warn('macro `{}\' redefined'.format(name)) diff --git a/biblib/test.py b/biblib/test.py index 714d89a..531c3e5 100644 --- a/biblib/test.py +++ b/biblib/test.py @@ -66,6 +66,23 @@ def test_comment(self): '@comment{abc@misc{x}', [ent('misc', 'x', od())]) + def test_comment_entry_types(self): + # @comment@book is entry type "book", not a comment command. + # Per BibTeX spec, @comment must be followed by a delimiter + # (space, tab, {, or ,) to be recognized as a comment; + # otherwise the text after @ is treated as the entry type. + self.__test_parse( + '@comment@book{test, title = {Test}}', + [ent('book', 'test', od('title', 'Test'))]) + # @comment@string is entry type "string" + self.__test_parse( + '@comment@string{foo, title = {Bar}}', + [ent('string', 'foo', od('title', 'Bar'))]) + # @comment followed by space is a valid comment + bib_with_comment = '@comment {text}\n@misc{x, title = {X}}' + self.__test_parse(bib_with_comment, + [ent('misc', 'x', od('title', 'X'))]) + class EntryTest(unittest.TestCase): def test_to_bib(self): entry = Entry([('author', 'An Author'),