Encode XML
MediumUpdated: Oct 17, 2025
Problem
Since XML is very verbose, you are given a way of encoding it where each tag gets mapped to a pre-defined integer value. The language/grammar is as follows:
Element --> Tag Attributes END Children END
Attribute --> Tag Value
END --> 0
Tag --> some predefined mapping to int
Value --> string value
Implement an encoder that takes a parsed element (tag, attributes, children) and emits the integer and string tokens in the order described. The encoder must match the example encoding below.
Examples
Example 1
Input: xml =
<family lastName="Coder" state="CA">
<person firstName="K5kc">Hello World</person>
</family>
Output: [1, 4, "Coder", 5, "CA", 0, 2, 3, "K5kc", 0, "Hello World", 0, 0]
Explanation:
Assuming mapping: family->1, person->2, lastName->4, firstName->3, state->5, above output is generated from encode function as tokens.
# 1 -- family tag
# 4 'Coder' 0 -- attribute lastName=Coder
# 5 'CA' 0 -- attribute state=CA
# 0 -- end of attributes
# 2 -- child person tag
# 3 'K5kc' 0 -- attribute firstName=K5kc
# 0 -- end of attributes for person
# 'Hello World' 0 -- text child for person
# 0 -- end of person element
# 0 -- end of family element
Follow up
Can you write the decoder for the same.
Solution
Method 1 - Recursion
Intuition
Keeping encode and decode together in the same Solution (or Codec) object makes the contract explicit and mirrors common interview patterns (e.g., LeetCode's Codec for trees). The same token mappings and helper routines are shared, reducing duplication and avoiding mismatched conventions between serializer and parser.
Approach
- Keep a single
Solutionclass that stores the forwardtoken: name->intand reverserev: int->namemappings. - Implement
encode(root)using a preorder traversal that emits tokens and0sentinels (same logic as before). - Implement
decode(tokens)as a recursive parser that consumes tokens with a cursor and reconstructsElementobjects. - Both functions share the same invariants about how attributes and children are delimited.
Complexity
- ⏰ Time complexity:
O(n + s)– Each token is processed once across encoding/decoding;nis number of element tags andsis total string length. - 🧺 Space complexity:
O(h + s)– recursion stack and output/tree storage.
Code
C++
#include <string>
#include <vector>
#include <unordered_map>
using std::string;
using std::vector;
using std::unordered_map;
struct Element {
string tag;
vector<std::pair<string,string>> attributes;
vector<Element> children;
};
class Solution {
public:
unordered_map<string,int> token;
unordered_map<int,string> rev;
vector<string> encode(const Element &root) {
vector<string> out;
dfs_encode(root, out);
return out;
}
Element decode(const vector<string> &tokens) {
this->tokens = tokens;
idx = 0;
return parse();
}
private:
vector<string> tokens;
int idx = 0;
void dfs_encode(const Element &e, vector<string> &out) {
out.push_back(std::to_string(token.at(e.tag)));
for (auto &p : e.attributes) {
out.push_back(std::to_string(token.at(p.first)));
out.push_back(p.second);
out.push_back("0");
}
out.push_back("0");
for (auto &c : e.children) dfs_encode(c, out);
out.push_back("0");
}
Element parse() {
Element e;
int t = stoi(tokens[idx++]);
e.tag = rev.at(t);
while (tokens[idx] != "0") {
int at = stoi(tokens[idx++]);
string val;
while (tokens[idx] != "0") { val += tokens[idx++]; if (tokens[idx] != "0") val += " "; }
idx++; // consume attribute-string terminator 0
e.attributes.emplace_back(rev.at(at), val);
}
idx++; // end attributes
while (tokens[idx] != "0") e.children.push_back(parse());
idx++; // end element
return e;
}
};
Go
package solution
import (
"fmt"
"strings"
)
type Element struct {
Tag string
Attributes [][2]string
Children []Element
}
type Solution struct {
Token map[string]int
Rev map[int]string
tokens []string
idx int
}
func (s *Solution) Encode(root Element) []string {
var out []string
var dfs func(e Element)
dfs = func(e Element) {
out = append(out, fmt.Sprintf("%d", s.Token[e.Tag]))
for _, a := range e.Attributes {
out = append(out, fmt.Sprintf("%d", s.Token[a[0]]))
out = append(out, a[1])
out = append(out, "0")
}
out = append(out, "0")
for _, c := range e.Children { dfs(c) }
out = append(out, "0")
}
dfs(root)
return out
}
func (s *Solution) Decode(tokens []string) Element {
s.tokens = tokens
s.idx = 0
return s.parse()
}
func (s *Solution) parse() Element {
t := atoi(s.tokens[s.idx]); s.idx++
e := Element{Tag: s.Rev[t]}
for s.tokens[s.idx] != "0" {
at := atoi(s.tokens[s.idx]); s.idx++
var valParts []string
for s.tokens[s.idx] != "0" {
valParts = append(valParts, s.tokens[s.idx]); s.idx++
}
s.idx++
e.Attributes = append(e.Attributes, [2]string{s.Rev[at], strings.Join(valParts, " ")})
}
s.idx++
for s.tokens[s.idx] != "0" {
e.Children = append(e.Children, s.parse())
}
s.idx++
return e
}
Java
import java.util.*;
class Element {
String tag;
List<Map.Entry<String,String>> attributes = new ArrayList<>();
List<Element> children = new ArrayList<>();
}
public class Solution {
Map<String,Integer> token = new HashMap<>();
Map<Integer,String> rev = new HashMap<>();
List<String> tokens;
int idx = 0;
public List<String> encode(Element root) {
List<String> out = new ArrayList<>();
dfsEncode(root, out);
return out;
}
public Element decode(List<String> tokens) {
this.tokens = tokens; this.idx = 0;
return parse();
}
private void dfsEncode(Element e, List<String> out) {
out.add(String.valueOf(token.get(e.tag)));
for (Map.Entry<String,String> a : e.attributes) {
out.add(String.valueOf(token.get(a.getKey())));
out.add(a.getValue());
out.add("0");
}
out.add("0");
for (Element c : e.children) dfsEncode(c, out);
out.add("0");
}
private Element parse() {
Element e = new Element();
int t = Integer.parseInt(tokens.get(idx++));
e.tag = rev.get(t);
while (!tokens.get(idx).equals("0")) {
int at = Integer.parseInt(tokens.get(idx++));
StringBuilder val = new StringBuilder();
while (!tokens.get(idx).equals("0")) val.append(tokens.get(idx++)).append(' ');
idx++;
e.attributes.add(new AbstractMap.SimpleEntry<>(rev.get(at), val.toString().trim()));
}
idx++;
while (!tokens.get(idx).equals("0")) e.children.add(parse());
idx++;
return e;
}
}
Kotlin
data class Element(
val tag: String,
var attributes: List<Pair<String,String>> = listOf(),
var children: List<Element> = listOf()
)
class Solution(val token: Map<String, Int>, val rev: Map<Int,String>) {
lateinit var tokens: List<String>
var idx = 0
fun encode(root: Element): List<String> {
val out = mutableListOf<String>()
fun dfs(e: Element) {
out += token[e.tag].toString()
for ((k, v) in e.attributes) {
out += token[k].toString()
out += v
out += "0"
}
out += "0"
for (c in e.children) dfs(c)
out += "0"
}
dfs(root)
return out
}
fun decode(tokens: List<String>): Element {
this.tokens = tokens; idx = 0
return parse()
}
private fun parse(): Element {
val t = tokens[idx++].toInt()
val e = Element(rev[t]!!)
while (tokens[idx] != "0") {
val at = tokens[idx++].toInt()
val parts = mutableListOf<String>()
while (tokens[idx] != "0") parts.add(tokens[idx++])
idx++
e.attributes = e.attributes + Pair(rev[at]!!, parts.joinToString(" "))
}
idx++
val children = mutableListOf<Element>()
while (tokens[idx] != "0") children += parse()
idx++
e.children = children
return e
}
}
Python
from __future__ import annotations
from typing import List, Tuple, Dict
class Element:
def __init__(self, tag: str, attributes: List[Tuple[str,str]] = None, children: List['Element'] = None):
self.tag = tag
self.attributes = attributes or []
self.children = children or []
class Solution:
def __init__(self, token: Dict[str,int]):
self.token = token
self.rev = {v:k for k,v in token.items()}
self.tokens: List[str] = []
self.i = 0
def encode(self, root: Element) -> List[str]:
out: List[str] = []
def dfs(e: Element) -> None:
out.append(str(self.token[e.tag]))
for (k, v) in e.attributes:
out.append(str(self.token[k]))
out.append(v)
out.append("0")
out.append("0")
for c in e.children:
dfs(c)
out.append("0")
dfs(root)
return out
def decode(self, tokens: List[str]) -> Element:
self.tokens = tokens
self.i = 0
return self._parse()
def _parse(self) -> Element:
t = int(self.tokens[self.i]); self.i += 1
e = Element(self.rev[t])
# attributes
while self.tokens[self.i] != '0':
at = int(self.tokens[self.i]); self.i += 1
parts = []
while self.tokens[self.i] != '0':
parts.append(self.tokens[self.i]); self.i += 1
self.i += 1
e.attributes.append((self.rev[at], ' '.join(parts)))
self.i += 1
# children
while self.tokens[self.i] != '0':
e.children.append(self._parse())
self.i += 1
return e
Rust
use std::collections::HashMap;
pub struct Element {
pub tag: String,
pub attributes: Vec<(String,String)>,
pub children: Vec<Element>,
}
pub struct Solution {
pub token: HashMap<String,i32>,
pub rev: HashMap<i32,String>,
}
impl Solution {
pub fn encode(&self, root: &Element) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
fn dfs(s: &Solution, e: &Element, out: &mut Vec<String>) {
out.push(s.token.get(&e.tag).unwrap().to_string());
for (k,v) in &e.attributes {
out.push(s.token.get(k).unwrap().to_string());
out.push(v.clone());
out.push("0".to_string());
}
out.push("0".to_string());
for c in &e.children { dfs(s,c,out); }
out.push("0".to_string());
}
dfs(self, root, &mut out);
out
}
pub fn decode(&self, tokens: &[String]) -> Element {
let mut idx = 0usize;
fn parse(s: &Solution, tokens: &[String], idx: &mut usize) -> Element {
let t = tokens[*idx].parse::<i32>().unwrap(); *idx += 1;
let mut e = Element { tag: s.rev.get(&t).unwrap().clone(), attributes: vec![], children: vec![] };
while tokens[*idx] != "0" {
let at = tokens[*idx].parse::<i32>().unwrap(); *idx += 1;
let mut parts = Vec::new();
while tokens[*idx] != "0" { parts.push(tokens[*idx].clone()); *idx += 1; }
*idx += 1;
e.attributes.push((s.rev.get(&at).unwrap().clone(), parts.join(" ")));
}
*idx += 1;
while tokens[*idx] != "0" { e.children.push(parse(s, tokens, idx)); }
*idx += 1;
e
}
parse(self, tokens, &mut idx)
}
}
Typescript
type Attribute = [string,string];
type Element = { tag: string; attributes?: Attribute[]; children?: Element[] };
class Solution {
token: Record<string, number>;
rev: Record<number, string>;
tokens: string[] = [];
idx = 0;
constructor(token: Record<string, number>, rev: Record<number,string>) { this.token = token; this.rev = rev; }
encode(root: Element): string[] {
const out: string[] = [];
const dfs = (e: Element) => {
out.push(String(this.token[e.tag]));
for (const a of e.attributes || []) {
out.push(String(this.token[a[0]]));
out.push(a[1]);
out.push('0');
}
out.push('0');
for (const c of e.children || []) dfs(c);
out.push('0');
};
dfs(root);
return out;
}
decode(tokens: string[]): Element {
this.tokens = tokens; this.idx = 0;
return this.parse();
}
private parse(): Element {
const t = Number(this.tokens[this.idx++]);
const e: Element = { tag: this.rev[t], attributes: [], children: [] };
while (this.tokens[this.idx] !== '0') {
const at = Number(this.tokens[this.idx++]);
const parts: string[] = [];
while (this.tokens[this.idx] !== '0') parts.push(this.tokens[this.idx++]);
this.idx++;
e.attributes.push([this.rev[at], parts.join(' ')]);
}
this.idx++;
while (this.tokens[this.idx] !== '0') e.children.push(this.parse());
this.idx++;
return e;
}
}