Sentence Validation Stream Processor
Problem
Create a basic sentence checker that takes in a stream of characters and determines whether they form valid sentences. If a sentence is valid, the program should print it out.
We can consider a sentence valid if it conforms to the following rules:
- The sentence must start with a capital letter, followed by a lowercase letter or a space.
- All other characters must be lowercase letters, separators (
,,;,:) or terminal marks (.,?,!,‽). - There must be a single space between each word.
- The sentence must end with a terminal mark immediately following a word.
Examples
Example 1
Input: "Hello world!"
Output: "Hello world!"
Explanation: Valid sentence - starts with capital, has single space between words, ends with terminal mark.
Example 2
Input: "hello world!"
Output: (no output)
Explanation: Invalid - doesn't start with capital letter.
Example 3
Input: "Hello world!"
Output: (no output)
Explanation: Invalid - has double space between words.
Example 4
Input: "Hello, world; how are you?"
Output: "Hello, world; how are you?"
Explanation: Valid - contains separators and proper formatting.
Example 5
Input: "Hello world"
Output: (no output)
Explanation: Invalid - doesn't end with terminal mark.
Example 6
Input: "Hello World!"
Output: (no output)
Explanation: Invalid - 'World' starts with capital (should be lowercase).
Solution
Method 1 - State Machine Validation
Intuition
We can model this problem as a finite state machine where we track the current state of sentence parsing. The key insight is that we need to validate characters sequentially while maintaining context about what characters are expected next based on the current position in the sentence.
Approach
- Define states: START, FIRST_CHAR, WORD, SPACE, SEPARATOR, TERMINAL
- For each character, check if it's valid for the current state
- Transition to the next appropriate state
- Track if we've seen any invalid transitions
- A sentence is valid only if it ends in TERMINAL state with no invalid transitions
Key validation rules:
- First character must be uppercase letter
- Second character must be lowercase letter or space
- Words consist only of lowercase letters
- Single spaces separate words
- Separators can appear after words
- Terminal marks must follow words directly
Code
C++
class Solution {
public:
bool isValidSentence(string s) {
if (s.empty()) return false;
enum State { START, FIRST_CHAR, WORD, SPACE, SEPARATOR, TERMINAL };
State state = START;
for (int i = 0; i < s.length(); i++) {
char c = s[i];
switch (state) {
case START:
if (isupper(c)) {
state = FIRST_CHAR;
} else {
return false;
}
break;
case FIRST_CHAR:
if (islower(c)) {
state = WORD;
} else if (c == ' ') {
return false; // Space after single capital letter
} else if (isTerminal(c)) {
state = TERMINAL;
} else {
return false;
}
break;
case WORD:
if (islower(c)) {
// Stay in WORD
} else if (c == ' ') {
state = SPACE;
} else if (isSeparator(c)) {
state = SEPARATOR;
} else if (isTerminal(c)) {
state = TERMINAL;
} else {
return false;
}
break;
case SPACE:
if (islower(c)) {
state = WORD;
} else {
return false; // No double spaces or other chars after space
}
break;
case SEPARATOR:
if (c == ' ') {
state = SPACE;
} else if (islower(c)) {
state = WORD;
} else if (isTerminal(c)) {
state = TERMINAL;
} else {
return false;
}
break;
case TERMINAL:
return false; // Nothing should follow terminal
}
}
return state == TERMINAL;
}
private:
bool isSeparator(char c) {
return c == ',' || c == ';' || c == ':';
}
bool isTerminal(char c) {
return c == '.' || c == '?' || c == '!' || c == '‽';
}
};
Go
func isValidSentence(s string) bool {
if len(s) == 0 {
return false
}
const (
START = iota
FIRST_CHAR
WORD
SPACE
SEPARATOR
TERMINAL
)
state := START
for i, c := range s {
switch state {
case START:
if c >= 'A' && c <= 'Z' {
state = FIRST_CHAR
} else {
return false
}
case FIRST_CHAR:
if c >= 'a' && c <= 'z' {
state = WORD
} else if c == ' ' {
return false
} else if isTerminal(c) {
state = TERMINAL
} else {
return false
}
case WORD:
if c >= 'a' && c <= 'z' {
// Stay in WORD
} else if c == ' ' {
state = SPACE
} else if isSeparator(c) {
state = SEPARATOR
} else if isTerminal(c) {
state = TERMINAL
} else {
return false
}
case SPACE:
if c >= 'a' && c <= 'z' {
state = WORD
} else {
return false
}
case SEPARATOR:
if c == ' ' {
state = SPACE
} else if c >= 'a' && c <= 'z' {
state = WORD
} else if isTerminal(c) {
state = TERMINAL
} else {
return false
}
case TERMINAL:
return false
}
}
return state == TERMINAL
}
func isSeparator(c rune) bool {
return c == ',' || c == ';' || c == ':'
}
func isTerminal(c rune) bool {
return c == '.' || c == '?' || c == '!' || c == '‽'
}
Java
class Solution {
private enum State {
START, FIRST_CHAR, WORD, SPACE, SEPARATOR, TERMINAL
}
public boolean isValidSentence(String s) {
if (s.isEmpty()) return false;
State state = State.START;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
switch (state) {
case START:
if (Character.isUpperCase(c)) {
state = State.FIRST_CHAR;
} else {
return false;
}
break;
case FIRST_CHAR:
if (Character.isLowerCase(c)) {
state = State.WORD;
} else if (c == ' ') {
return false;
} else if (isTerminal(c)) {
state = State.TERMINAL;
} else {
return false;
}
break;
case WORD:
if (Character.isLowerCase(c)) {
// Stay in WORD
} else if (c == ' ') {
state = State.SPACE;
} else if (isSeparator(c)) {
state = State.SEPARATOR;
} else if (isTerminal(c)) {
state = State.TERMINAL;
} else {
return false;
}
break;
case SPACE:
if (Character.isLowerCase(c)) {
state = State.WORD;
} else {
return false;
}
break;
case SEPARATOR:
if (c == ' ') {
state = State.SPACE;
} else if (Character.isLowerCase(c)) {
state = State.WORD;
} else if (isTerminal(c)) {
state = State.TERMINAL;
} else {
return false;
}
break;
case TERMINAL:
return false;
}
}
return state == State.TERMINAL;
}
private boolean isSeparator(char c) {
return c == ',' || c == ';' || c == ':';
}
private boolean isTerminal(char c) {
return c == '.' || c == '?' || c == '!' || c == '‽';
}
}
Python
class Solution:
def is_valid_sentence(self, s: str) -> bool:
if not s:
return False
START, FIRST_CHAR, WORD, SPACE, SEPARATOR, TERMINAL = 0, 1, 2, 3, 4, 5
state = START
for c in s:
if state == START:
if c.isupper():
state = FIRST_CHAR
else:
return False
elif state == FIRST_CHAR:
if c.islower():
state = WORD
elif c == ' ':
return False
elif self._is_terminal(c):
state = TERMINAL
else:
return False
elif state == WORD:
if c.islower():
pass # Stay in WORD
elif c == ' ':
state = SPACE
elif self._is_separator(c):
state = SEPARATOR
elif self._is_terminal(c):
state = TERMINAL
else:
return False
elif state == SPACE:
if c.islower():
state = WORD
else:
return False
elif state == SEPARATOR:
if c == ' ':
state = SPACE
elif c.islower():
state = WORD
elif self._is_terminal(c):
state = TERMINAL
else:
return False
elif state == TERMINAL:
return False
return state == TERMINAL
def _is_separator(self, c: str) -> bool:
return c in ',;:'
def _is_terminal(self, c: str) -> bool:
return c in '.?!‽'
Complexity
- ⏰ Time complexity:
O(n), where n is the length of the input string. We process each character exactly once. - 🧺 Space complexity:
O(1), as we only use a constant amount of extra space for state tracking.
Method 2 - Pattern Matching with Regular Expressions
Intuition
We can use regular expressions to define the exact pattern that a valid sentence must match. This approach is more concise but potentially less efficient for streaming data.
Approach
- Define a regex pattern that captures all the rules:
- Starts with uppercase letter
- Followed by lowercase letters and valid separators
- Proper spacing rules
- Ends with terminal mark
- Match the entire string against this pattern
- Return true if it matches completely
Code
C++
class Solution {
public:
bool isValidSentence(string s) {
if (s.empty()) return false;
// Pattern explanation:
// ^[A-Z] - starts with capital letter
// ([a-z]+([,;:]?( [a-z]+)*))? - optional word with separators and spaces
// [.?!‽]$ - ends with terminal mark
regex pattern("^[A-Z]([a-z]+([,;:]?( [a-z]+)*)?)*[.?!‽]$");
return regex_match(s, pattern);
}
};
Java
class Solution {
public boolean isValidSentence(String s) {
if (s.isEmpty()) return false;
String pattern = "^[A-Z]([a-z]+([,;:]?( [a-z]+)*)?)*[.?!‽]$";
return s.matches(pattern);
}
}
Python
class Solution:
def is_valid_sentence(self, s: str) -> bool:
if not s:
return False
import re
pattern = r"^[A-Z]([a-z]+([,;:]?( [a-z]+)*)?)*[.?!‽]$"
return bool(re.match(pattern, s))
Complexity
- ⏰ Time complexity:
O(n), where n is the length of the input string. Regular expression matching is typically linear for simple patterns. - 🧺 Space complexity:
O(1), excluding the space used by the regex engine for compilation.
Method 3 - Streaming Character Processor
Intuition
For streaming applications, we need to process characters one by one and maintain state between calls. This approach is most suitable when characters arrive incrementally rather than as a complete string.
Approach
- Maintain a sentence buffer and validation state
- Process each character as it arrives
- When a terminal mark is encountered, validate the complete sentence
- Reset state after processing each sentence
- Handle edge cases like empty sentences or invalid character sequences
Code
C++
class Solution {
private:
string buffer;
bool valid;
public:
Solution() : valid(true) {}
void processChar(char c) {
buffer += c;
if (isTerminal(c)) {
if (valid && isValidComplete()) {
cout << buffer << endl;
}
reset();
} else {
valid = valid && isValidAtPosition(c, buffer.length() - 1);
}
}
private:
bool isValidAtPosition(char c, int pos) {
if (pos == 0) {
return isupper(c);
}
if (pos == 1) {
return islower(c) || c == ' ';
}
char prev = buffer[pos - 1];
if (c == ' ') {
return islower(prev) || isSeparator(prev);
}
if (islower(c)) {
return prev != ' ' || buffer.length() > 2;
}
if (isSeparator(c)) {
return islower(prev);
}
return false;
}
bool isValidComplete() {
if (buffer.length() < 2) return false;
char lastChar = buffer[buffer.length() - 2];
return islower(lastChar);
}
void reset() {
buffer.clear();
valid = true;
}
bool isSeparator(char c) {
return c == ',' || c == ';' || c == ':';
}
bool isTerminal(char c) {
return c == '.' || c == '?' || c == '!' || c == '‽';
}
};
Java
class Solution {
private StringBuilder buffer;
private boolean valid;
public Solution() {
buffer = new StringBuilder();
valid = true;
}
public void processChar(char c) {
buffer.append(c);
if (isTerminal(c)) {
if (valid && isValidComplete()) {
System.out.println(buffer.toString());
}
reset();
} else {
valid = valid && isValidAtPosition(c, buffer.length() - 1);
}
}
private boolean isValidAtPosition(char c, int pos) {
if (pos == 0) {
return Character.isUpperCase(c);
}
if (pos == 1) {
return Character.isLowerCase(c) || c == ' ';
}
char prev = buffer.charAt(pos - 1);
if (c == ' ') {
return Character.isLowerCase(prev) || isSeparator(prev);
}
if (Character.isLowerCase(c)) {
return prev != ' ' || buffer.length() > 2;
}
if (isSeparator(c)) {
return Character.isLowerCase(prev);
}
return false;
}
private boolean isValidComplete() {
if (buffer.length() < 2) return false;
char lastChar = buffer.charAt(buffer.length() - 2);
return Character.isLowerCase(lastChar);
}
private void reset() {
buffer.setLength(0);
valid = true;
}
private boolean isSeparator(char c) {
return c == ',' || c == ';' || c == ':';
}
private boolean isTerminal(char c) {
return c == '.' || c == '?' || c == '!' || c == '‽';
}
}
Python
class Solution:
def __init__(self):
self.buffer: str = ""
self.valid: bool = True
def process_char(self, c: str) -> None:
self.buffer += c
if self._is_terminal(c):
if self.valid and self._is_valid_complete():
print(self.buffer)
self._reset()
else:
self.valid = self.valid and self._is_valid_at_position(c, len(self.buffer) - 1)
def _is_valid_at_position(self, c: str, pos: int) -> bool:
if pos == 0:
return c.isupper()
if pos == 1:
return c.islower() or c == ' '
prev = self.buffer[pos - 1]
if c == ' ':
return prev.islower() or self._is_separator(prev)
if c.islower():
return prev != ' ' or len(self.buffer) > 2
if self._is_separator(c):
return prev.islower()
return False
def _is_valid_complete(self) -> bool:
if len(self.buffer) < 2:
return False
last_char = self.buffer[-2]
return last_char.islower()
def _reset(self) -> None:
self.buffer = ""
self.valid = True
def _is_separator(self, c: str) -> bool:
return c in ',;:'
def _is_terminal(self, c: str) -> bool:
return c in '.?!‽'
Complexity
- ⏰ Time complexity:
O(1)per character,O(n)total for processing a sentence of length n. - 🧺 Space complexity:
O(n), where n is the maximum length of a sentence (for the buffer storage).