3232
3333#define peek_at (i , n ) (i)->data[n]
3434
35- static bool S_last_line_blank (const cmark_node * node ) {
36- return (node -> flags & CMARK_NODE__LAST_LINE_BLANK ) != 0 ;
37- }
38-
3935static CMARK_INLINE cmark_node_type S_type (const cmark_node * node ) {
4036 return (cmark_node_type )node -> type ;
4137}
4238
43- static void S_set_last_line_blank (cmark_node * node , bool is_blank ) {
44- if (is_blank )
45- node -> flags |= CMARK_NODE__LAST_LINE_BLANK ;
46- else
47- node -> flags &= ~CMARK_NODE__LAST_LINE_BLANK ;
48- }
49-
5039static CMARK_INLINE bool S_is_line_end_char (char c ) {
5140 return (c == '\n' || c == '\r' );
5241}
@@ -124,8 +113,6 @@ void cmark_parser_free(cmark_parser *parser) {
124113 mem -> free (parser );
125114}
126115
127- static cmark_node * finalize (cmark_parser * parser , cmark_node * b );
128-
129116// Returns true if line has only space characters, else false.
130117static bool is_blank_raw (const unsigned char * ptr , const bufsize_t size ,
131118 bufsize_t offset ) {
@@ -209,26 +196,25 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
209196 return ;
210197 }
211198
199+ // Scan forward until line end to keep trailing spaces of the last line.
212200 for (; i < ln -> size ; ++ i ) {
213201 c = ln -> ptr [i ];
214202
215203 if (!S_is_line_end_char (c ))
216204 continue ;
217205
218- cmark_strbuf_truncate (ln , i );
206+ if (c == '\r' && i + 1 < ln -> size && ln -> ptr [i + 1 ] == '\n' ) {
207+ i ++ ;
208+ }
209+
210+ cmark_strbuf_truncate (ln , i + 1 );
219211 break ;
220212 }
221213}
222214
223- // Check to see if a node ends with a blank line, descending
224- // if needed into lists and sublists.
225- static bool S_ends_with_blank_line (cmark_node * node ) {
226- if ((S_type (node ) == CMARK_NODE_LIST ||
227- S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
228- return (S_ends_with_blank_line (node -> last_child ));
229- } else {
230- return (S_last_line_blank (node ));
231- }
215+ // Check to see if a node ends with a blank line.
216+ static CMARK_INLINE bool S_ends_with_blank_line (cmark_node * node ) {
217+ return node -> next && node -> end_line != node -> next -> start_line - 1 ;
232218}
233219
234220// returns true if content remains after link defs are resolved.
@@ -331,7 +317,15 @@ static void resolve_all_reference_link_definitions(cmark_parser *parser) {
331317 }
332318}
333319
334- static cmark_node * finalize (cmark_parser * parser , cmark_node * b ) {
320+ // `closed_explicitly` states that the node is closed by explicit markers, or
321+ // the node cannot span more than one line:
322+ //
323+ // - Close tag of HTML blocks
324+ // - Closing code fence
325+ // - ATX headings
326+ // - Thematic breaks
327+ static cmark_node * finalize (cmark_parser * parser , cmark_node * b ,
328+ bool closed_explicitly ) {
335329 bufsize_t pos ;
336330 cmark_node * item ;
337331 cmark_node * subitem ;
@@ -342,22 +336,22 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
342336 CMARK_NODE__OPEN ); // shouldn't call finalize on closed blocks
343337 b -> flags &= ~CMARK_NODE__OPEN ;
344338
345- if (parser -> curline . size == 0 ) {
346- // end of input - line number has not been incremented
347- b -> end_line = parser -> line_number ;
348- b -> end_column = parser -> last_line_length ;
349- } else if ( S_type ( b ) == CMARK_NODE_DOCUMENT ||
350- ( S_type ( b ) == CMARK_NODE_CODE_BLOCK && b -> as . code . fenced ) ||
351- ( S_type ( b ) == CMARK_NODE_HEADING && b -> as . heading . setext )) {
352- b -> end_line = parser -> line_number ;
353- b -> end_column = parser -> curline .size ;
354- if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\n' )
355- b -> end_column -= 1 ;
356- if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\r' )
357- b -> end_column -= 1 ;
358- } else {
359- b -> end_line = parser -> line_number - 1 ;
360- b -> end_column = parser -> last_line_length ;
339+ if (S_type ( b ) != CMARK_NODE_CODE_BLOCK || b -> as . code . fenced ) {
340+ if ( parser -> curline . size == 0 ) {
341+ // end of input - line number has not been incremented
342+ b -> end_line = parser -> line_number ;
343+ b -> end_column = parser -> last_line_length ;
344+ } else if ( closed_explicitly ) {
345+ b -> end_line = parser -> line_number ;
346+ b -> end_column = parser -> curline . size ;
347+ if ( b -> end_column && parser -> curline .ptr [ b -> end_column - 1 ] == '\n' )
348+ b -> end_column -= 1 ;
349+ if ( b -> end_column && parser -> curline . ptr [ b -> end_column - 1 ] == '\r' )
350+ b -> end_column -= 1 ;
351+ } else {
352+ b -> end_line = parser -> line_number - 1 ;
353+ b -> end_column = parser -> last_line_length ;
354+ }
361355 }
362356
363357 cmark_strbuf * node_content = & parser -> content ;
@@ -371,7 +365,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
371365 case CMARK_NODE_CODE_BLOCK :
372366 if (!b -> as .code .fenced ) { // indented code
373367 remove_trailing_blank_lines (node_content );
374- cmark_strbuf_putc (node_content , '\n' );
375368 } else {
376369 // first line of contents becomes info
377370 for (pos = 0 ; pos < node_content -> size ; ++ pos ) {
@@ -412,16 +405,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
412405
413406 while (item ) {
414407 // check for non-final non-empty list item ending with blank line:
415- if (S_last_line_blank ( item ) && item -> next ) {
408+ if (item -> next && S_ends_with_blank_line ( item ) ) {
416409 b -> as .list .tight = false;
417410 break ;
418411 }
419412 // recurse into children of list item, to see if there are
420413 // spaces between them:
421414 subitem = item -> first_child ;
422415 while (subitem ) {
423- if ((item -> next || subitem -> next ) &&
424- S_ends_with_blank_line (subitem )) {
416+ if (subitem -> next && S_ends_with_blank_line (subitem )) {
425417 b -> as .list .tight = false;
426418 break ;
427419 }
@@ -432,9 +424,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
432424 }
433425 item = item -> next ;
434426 }
427+ b -> end_line = b -> last_child -> end_line ;
428+ b -> end_column = b -> last_child -> end_column ;
435429
436430 break ;
437431
432+ case CMARK_NODE_ITEM :
433+ if (b -> last_child ) {
434+ b -> end_line = b -> last_child -> end_line ;
435+ b -> end_column = b -> last_child -> end_column ;
436+ }
437+ // If the item is empty, it is closed when the next line is processed and
438+ // the end position is set by the normal path. Note that if the first line
439+ // and second line of a item are blank, it is closed.
440+ break ;
441+
438442 case CMARK_NODE_DOCUMENT :
439443 resolve_all_reference_link_definitions (parser );
440444 break ;
@@ -454,7 +458,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
454458 // if 'parent' isn't the kind of node that can accept this child,
455459 // then back up til we hit a node that can.
456460 while (!can_contain (S_type (parent ), block_type )) {
457- parent = finalize (parser , parent );
461+ parent = finalize (parser , parent , false );
458462 }
459463
460464 cmark_node * child =
@@ -594,10 +598,10 @@ static int lists_match(cmark_list *list_data, cmark_list *item_data) {
594598
595599static cmark_node * finalize_document (cmark_parser * parser ) {
596600 while (parser -> current != parser -> root ) {
597- parser -> current = finalize (parser , parser -> current );
601+ parser -> current = finalize (parser , parser -> current , false );
598602 }
599603
600- finalize (parser , parser -> root );
604+ finalize (parser , parser -> root , false );
601605
602606 // Limit total size of extra content created from reference links to
603607 // document size to avoid superlinear growth. Always allow 100KB.
@@ -917,7 +921,7 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
917921 // the end of a line, we can stop processing it:
918922 * should_continue = false;
919923 S_advance_offset (parser , input , matched , false);
920- parser -> current = finalize (parser , container );
924+ parser -> current = finalize (parser , container , true );
921925 } else {
922926 // skip opt. spaces of fence parser->offset
923927 int i = container -> as .code .fence_offset ;
@@ -1121,6 +1125,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11211125 // it's only now that we know the line is not part of a setext heading:
11221126 * container = add_child (parser , * container , CMARK_NODE_THEMATIC_BREAK ,
11231127 parser -> first_nonspace + 1 );
1128+ * container = finalize (parser , * container , true);
11241129 S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
11251130 } else if ((!indented || cont_type == CMARK_NODE_LIST ) &&
11261131 parser -> indent < 4 &&
@@ -1207,35 +1212,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12071212static void add_text_to_container (cmark_parser * parser , cmark_node * container ,
12081213 cmark_node * last_matched_container ,
12091214 cmark_chunk * input ) {
1210- cmark_node * tmp ;
12111215 // what remains at parser->offset is a text line. add the text to the
12121216 // appropriate container.
12131217
12141218 S_find_first_nonspace (parser , input );
12151219
1216- if (parser -> blank && container -> last_child )
1217- S_set_last_line_blank (container -> last_child , true);
1218-
1219- // block quote lines are never blank as they start with >
1220- // and we don't count blanks in fenced code for purposes of tight/loose
1221- // lists or breaking out of lists. we also don't set last_line_blank
1222- // on an empty list item.
1223- const cmark_node_type ctype = S_type (container );
1224- const bool last_line_blank =
1225- (parser -> blank && ctype != CMARK_NODE_BLOCK_QUOTE &&
1226- ctype != CMARK_NODE_HEADING && ctype != CMARK_NODE_THEMATIC_BREAK &&
1227- !(ctype == CMARK_NODE_CODE_BLOCK && container -> as .code .fenced ) &&
1228- !(ctype == CMARK_NODE_ITEM && container -> first_child == NULL &&
1229- container -> start_line == parser -> line_number ));
1230-
1231- S_set_last_line_blank (container , last_line_blank );
1232-
1233- tmp = container ;
1234- while (tmp -> parent ) {
1235- S_set_last_line_blank (tmp -> parent , false);
1236- tmp = tmp -> parent ;
1237- }
1238-
12391220 // If the last line processed belonged to a paragraph node,
12401221 // and we didn't match all of the line prefixes for the open containers,
12411222 // and we didn't start any new containers,
@@ -1249,7 +1230,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12491230 } else { // not a lazy continuation
12501231 // Finalize any blocks that were not matched and set cur to container:
12511232 while (parser -> current != last_matched_container ) {
1252- parser -> current = finalize (parser , parser -> current );
1233+ parser -> current = finalize (parser , parser -> current , false );
12531234 assert (parser -> current != NULL );
12541235 }
12551236
@@ -1291,7 +1272,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
12911272 }
12921273
12931274 if (matches_end_condition ) {
1294- container = finalize (parser , container );
1275+ container = finalize (parser , container , true );
12951276 assert (parser -> current != NULL );
12961277 }
12971278 } else if (parser -> blank ) {
@@ -1324,6 +1305,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13241305 bool all_matched = true;
13251306 cmark_node * container ;
13261307 cmark_chunk input ;
1308+ bool need_set_end_position = false;
13271309
13281310 if (parser -> options & CMARK_OPT_VALIDATE_UTF8 )
13291311 cmark_utf8proc_check (& parser -> curline , buffer , bytes );
@@ -1361,6 +1343,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13611343
13621344 add_text_to_container (parser , container , last_matched_container , & input );
13631345
1346+ need_set_end_position = S_type (container ) == CMARK_NODE_CODE_BLOCK &&
1347+ !container -> as .code .fenced &&
1348+ !parser -> blank ;
1349+
13641350finished :
13651351 parser -> last_line_length = input .len ;
13661352 if (parser -> last_line_length &&
@@ -1370,6 +1356,11 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13701356 input .data [parser -> last_line_length - 1 ] == '\r' )
13711357 parser -> last_line_length -= 1 ;
13721358
1359+ if (need_set_end_position ) {
1360+ container -> end_line = parser -> line_number ;
1361+ container -> end_column = parser -> last_line_length ;
1362+ }
1363+
13731364 cmark_strbuf_clear (& parser -> curline );
13741365}
13751366
0 commit comments