GPT-4

Shared conversation

GPT-4 ·

please alter the second check to find any ". username:" parts in the accumulated text, and if so split it there and keep the username: on the accumulator and past the username: to the end, and send the stuff up to the period . before username:.

When checking for the break point, make sure to use the same text cleaning method for consistency

                    if (content.endswith("]\n")) or (len(accumulated_text) >= characters_per_line and ('.' in content or '?' in content or '!' in content or '\n' in content)):
                        remaining_text = ""
                        remaining_text_tokens = 0

                        header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
                        current_tokens = 0
                        header_message["tokens"] = remaining_text_tokens
                        header_message["text"] = remaining_text
                        accumulated_text = remaining_text
                    # check for a stop token like .,!?] and a following name without spaces and then a colon like . username:
                    elif (len(accumulated_text.split(" ")) > 3) and accumulated_text.endswith(":") and (accumulated_text.split(" ")[-2].endswith(".") or accumulated_text.split(" ")[-2].endswith("!") or accumulated_text.split(" ")[-2].endswith("?") or accumulated_text.split(" ")[-2].endswith("]")) and len(accumulated_text.split(" ")[-1]) > 1:
                        remaining_text = ""
                        remaining_text_tokens = 0
                        remaining_text = accumulated_text.split(" ")[-1]
                        remaining_text_tokens = len(remaining_text.split())
                        header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
                        current_tokens = 0
                        header_message["tokens"] = remaining_text_tokens
                        header_message["text"] = remaining_text
                        accumulated_text = remaining_text
                    elif len(accumulated_text) >= (characters_per_line * 1.5) and (content.endswith(" ") or content.endswith(",") or content.startswith(" ")):
                        remaining_text = ""
                        remaining_text_tokens = 0
                        if content.startswith(" ") and len(content) > 1:
                            remaining_text = content[1:]
                            remaining_text_tokens = len(remaining_text.split())
                            # remove the duplicated end of accumulated text that contains the content token
                            accumulated_text = accumulated_text[:-(len(content)-1)]
                        header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
                        current_tokens = 0
                        header_message["tokens"] = remaining_text_tokens
                        header_message["text"] = remaining_text
                        accumulated_text = remaining_text

Here's the modified code to split accumulated text at ". username:" and keep "username:" with the subsequent text while sending the preceding text. I use a regular expression to find ". username:" and split the text accordingly.

python
import re

# ...

# When checking for the break point, make sure to use the same text cleaning method for consistency
if (content.endswith("]\n")) or (len(accumulated_text) >= characters_per_line and ('.' in content or '?' in content or '!' in content or '\n' in content)):
    remaining_text = ""
    remaining_text_tokens = 0

    header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
    current_tokens = 0
    header_message["tokens"] = remaining_text_tokens
    header_message["text"] = remaining_text
    accumulated_text = remaining_text

# Check for a pattern like ". username:"
elif len(accumulated_text.split(" ")) > 3:
    match = re.search(r"(\.|\!|\?|\])\s*\b\w+:", accumulated_text)
    if match:
        split_index = match.start()
        remaining_text = accumulated_text[split_index+1:]
        remaining_text_tokens = len(remaining_text.split())
        accumulated_text = accumulated_text[:split_index+1]

        header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
        current_tokens = 0
        header_message["tokens"] = remaining_text_tokens
        header_message["text"] = remaining_text
        accumulated_text = remaining_text

elif len(accumulated_text) >= (characters_per_line * 1.5) and (content.endswith(" ") or content.endswith(",") or content.startswith(" ")):
    remaining_text = ""
    remaining_text_tokens = 0
    if content.startswith(" ") and len(content) > 1:
        remaining_text = content[1:]
        remaining_text_tokens = len(remaining_text.split())
        # remove the duplicated end of accumulated text that contains the content token
        accumulated_text = accumulated_text[:-(len(content)-1)]
    header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
    current_tokens = 0
    header_message["tokens"] = remaining_text_tokens
    header_message["text"] = remaining_text
    accumulated_text = remaining_text

The regular expression (\.|\!|\?|\])\s*\b\w+: matches any pattern that ends with a period, exclamation mark, question mark or closing bracket, followed by zero or more spaces, a word (username), and a colon. The word boundary (\b) ensures that the username is a standalone word. The match's start index (match.start()) is used to split the accumulated text.

This conversation may have been edited by the sharer.

GPT-4Talk to GPT-4 on Poe