Shared conversation
GPT-4 ·
please alter the second check to find any ". username:" parts in the accumulated text, and if so split it there and keep the username: on the accumulator and past the username: to the end, and send the stuff up to the period . before username:.
When checking for the break point, make sure to use the same text cleaning method for consistency
if (content.endswith("]\n")) or (len(accumulated_text) >= characters_per_line and ('.' in content or '?' in content or '!' in content or '\n' in content)):
remaining_text = ""
remaining_text_tokens = 0
header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
current_tokens = 0
header_message["tokens"] = remaining_text_tokens
header_message["text"] = remaining_text
accumulated_text = remaining_text
# check for a stop token like .,!?] and a following name without spaces and then a colon like . username:
elif (len(accumulated_text.split(" ")) > 3) and accumulated_text.endswith(":") and (accumulated_text.split(" ")[-2].endswith(".") or accumulated_text.split(" ")[-2].endswith("!") or accumulated_text.split(" ")[-2].endswith("?") or accumulated_text.split(" ")[-2].endswith("]")) and len(accumulated_text.split(" ")[-1]) > 1:
remaining_text = ""
remaining_text_tokens = 0
remaining_text = accumulated_text.split(" ")[-1]
remaining_text_tokens = len(remaining_text.split())
header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
current_tokens = 0
header_message["tokens"] = remaining_text_tokens
header_message["text"] = remaining_text
accumulated_text = remaining_text
elif len(accumulated_text) >= (characters_per_line * 1.5) and (content.endswith(" ") or content.endswith(",") or content.startswith(" ")):
remaining_text = ""
remaining_text_tokens = 0
if content.startswith(" ") and len(content) > 1:
remaining_text = content[1:]
remaining_text_tokens = len(remaining_text.split())
# remove the duplicated end of accumulated text that contains the content token
accumulated_text = accumulated_text[:-(len(content)-1)]
header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
current_tokens = 0
header_message["tokens"] = remaining_text_tokens
header_message["text"] = remaining_text
accumulated_text = remaining_text
GPT-4
Here's the modified code to split accumulated text at ". username:" and keep "username:" with the subsequent text while sending the preceding text. I use a regular expression to find ". username:" and split the text accordingly.
import re
# ...
# When checking for the break point, make sure to use the same text cleaning method for consistency
if (content.endswith("]\n")) or (len(accumulated_text) >= characters_per_line and ('.' in content or '?' in content or '!' in content or '\n' in content)):
remaining_text = ""
remaining_text_tokens = 0
header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
current_tokens = 0
header_message["tokens"] = remaining_text_tokens
header_message["text"] = remaining_text
accumulated_text = remaining_text
# Check for a pattern like ". username:"
elif len(accumulated_text.split(" ")) > 3:
match = re.search(r"(\.|\!|\?|\])\s*\b\w+:", accumulated_text)
if match:
split_index = match.start()
remaining_text = accumulated_text[split_index+1:]
remaining_text_tokens = len(remaining_text.split())
accumulated_text = accumulated_text[:split_index+1]
header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
current_tokens = 0
header_message["tokens"] = remaining_text_tokens
header_message["text"] = remaining_text
accumulated_text = remaining_text
elif len(accumulated_text) >= (characters_per_line * 1.5) and (content.endswith(" ") or content.endswith(",") or content.startswith(" ")):
remaining_text = ""
remaining_text_tokens = 0
if content.startswith(" ") and len(content) > 1:
remaining_text = content[1:]
remaining_text_tokens = len(remaining_text.split())
# remove the duplicated end of accumulated text that contains the content token
accumulated_text = accumulated_text[:-(len(content)-1)]
header_message = send_group(accumulated_text, zmq_sender, header_message.copy(), sentence_count)
current_tokens = 0
header_message["tokens"] = remaining_text_tokens
header_message["text"] = remaining_text
accumulated_text = remaining_text
The regular expression (\.|\!|\?|\])\s*\b\w+: matches any pattern that ends with a period, exclamation mark, question mark or closing bracket, followed by zero or more spaces, a word (username), and a colon. The word boundary (\b) ensures that the username is a standalone word. The match's start index (match.start()) is used to split the accumulated text.
This conversation may have been edited by the sharer.