Problem

Since XML is very verbose, you are given a way of encoding it where each tag gets mapped to a pre-defined integer value. The language/grammar is as follows:

1
2
3
4
5
Element   --> Tag Attributes END Children END
Attribute --> Tag Value
END       --> 0
Tag       --> some predefined mapping to int
Value     --> string value

Implement an encoder that takes a parsed element (tag, attributes, children) and emits the integer and string tokens in the order described. The encoder must match the example encoding below.

Examples

Example 1

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
Input: xml =
<family lastName="Coder" state="CA">
    <person firstName="K5kc">Hello World</person>
</family>
Output: [1, 4, "Coder", 5, "CA", 0, 2, 3, "K5kc", 0, "Hello World", 0, 0]
Explanation: 
Assuming mapping: family->1, person->2, lastName->4, firstName->3, state->5, above output is generated from encode function as tokens.

# 1 -- family tag
# 4 'Coder' 0 -- attribute lastName=Coder
# 5 'CA' 0 -- attribute state=CA
# 0 -- end of attributes
# 2 -- child person tag
# 3 'K5kc' 0 -- attribute firstName=K5kc
# 0 -- end of attributes for person
# 'Hello World' 0 -- text child for person
# 0 -- end of person element
# 0 -- end of family element

Follow up

Can you write the decoder for the same.

Solution

Method 1 - Recursion

Intuition

Keeping encode and decode together in the same Solution (or Codec) object makes the contract explicit and mirrors common interview patterns (e.g., LeetCode’s Codec for trees). The same token mappings and helper routines are shared, reducing duplication and avoiding mismatched conventions between serializer and parser.

Approach

  1. Keep a single Solution class that stores the forward token: name->int and reverse rev: int->name mappings.
  2. Implement encode(root) using a preorder traversal that emits tokens and 0 sentinels (same logic as before).
  3. Implement decode(tokens) as a recursive parser that consumes tokens with a cursor and reconstructs Element objects.
  4. Both functions share the same invariants about how attributes and children are delimited.

Complexity

  • Time complexity: O(n + s) – Each token is processed once across encoding/decoding; n is number of element tags and s is total string length.
  • 🧺 Space complexity: O(h + s) – recursion stack and output/tree storage.

Code

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include <string>
#include <vector>
#include <unordered_map>

using std::string;
using std::vector;
using std::unordered_map;

struct Element {
  string tag;
  vector<std::pair<string,string>> attributes;
  vector<Element> children;
};

class Solution {
public:
  unordered_map<string,int> token;
  unordered_map<int,string> rev;

  vector<string> encode(const Element &root) {
    vector<string> out;
    dfs_encode(root, out);
    return out;
  }

  Element decode(const vector<string> &tokens) {
    this->tokens = tokens;
    idx = 0;
    return parse();
  }

private:
  vector<string> tokens;
  int idx = 0;

  void dfs_encode(const Element &e, vector<string> &out) {
    out.push_back(std::to_string(token.at(e.tag)));
    for (auto &p : e.attributes) {
      out.push_back(std::to_string(token.at(p.first)));
      out.push_back(p.second);
      out.push_back("0");
    }
    out.push_back("0");
    for (auto &c : e.children) dfs_encode(c, out);
    out.push_back("0");
  }

  Element parse() {
    Element e;
    int t = stoi(tokens[idx++]);
    e.tag = rev.at(t);
    while (tokens[idx] != "0") {
      int at = stoi(tokens[idx++]);
      string val;
      while (tokens[idx] != "0") { val += tokens[idx++]; if (tokens[idx] != "0") val += " "; }
      idx++; // consume attribute-string terminator 0
      e.attributes.emplace_back(rev.at(at), val);
    }
    idx++; // end attributes
    while (tokens[idx] != "0") e.children.push_back(parse());
    idx++; // end element
    return e;
  }
};
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
package solution

import (
    "fmt"
    "strings"
)

type Element struct {
    Tag        string
    Attributes [][2]string
    Children   []Element
}

type Solution struct {
    Token map[string]int
    Rev   map[int]string
    tokens []string
    idx    int
}

func (s *Solution) Encode(root Element) []string {
    var out []string
    var dfs func(e Element)
    dfs = func(e Element) {
        out = append(out, fmt.Sprintf("%d", s.Token[e.Tag]))
        for _, a := range e.Attributes {
            out = append(out, fmt.Sprintf("%d", s.Token[a[0]]))
            out = append(out, a[1])
            out = append(out, "0")
        }
        out = append(out, "0")
        for _, c := range e.Children { dfs(c) }
        out = append(out, "0")
    }
    dfs(root)
    return out
}

func (s *Solution) Decode(tokens []string) Element {
    s.tokens = tokens
    s.idx = 0
    return s.parse()
}

func (s *Solution) parse() Element {
    t := atoi(s.tokens[s.idx]); s.idx++
    e := Element{Tag: s.Rev[t]}
    for s.tokens[s.idx] != "0" {
        at := atoi(s.tokens[s.idx]); s.idx++
        var valParts []string
        for s.tokens[s.idx] != "0" {
            valParts = append(valParts, s.tokens[s.idx]); s.idx++
        }
        s.idx++
        e.Attributes = append(e.Attributes, [2]string{s.Rev[at], strings.Join(valParts, " ")})
    }
    s.idx++
    for s.tokens[s.idx] != "0" {
        e.Children = append(e.Children, s.parse())
    }
    s.idx++
    return e
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import java.util.*;

class Element {
    String tag;
    List<Map.Entry<String,String>> attributes = new ArrayList<>();
    List<Element> children = new ArrayList<>();
}

public class Solution {
    Map<String,Integer> token = new HashMap<>();
    Map<Integer,String> rev = new HashMap<>();
    List<String> tokens;
    int idx = 0;

    public List<String> encode(Element root) {
        List<String> out = new ArrayList<>();
        dfsEncode(root, out);
        return out;
    }

    public Element decode(List<String> tokens) {
        this.tokens = tokens; this.idx = 0;
        return parse();
    }

    private void dfsEncode(Element e, List<String> out) {
        out.add(String.valueOf(token.get(e.tag)));
        for (Map.Entry<String,String> a : e.attributes) {
            out.add(String.valueOf(token.get(a.getKey())));
            out.add(a.getValue());
            out.add("0");
        }
        out.add("0");
        for (Element c : e.children) dfsEncode(c, out);
        out.add("0");
    }

    private Element parse() {
        Element e = new Element();
        int t = Integer.parseInt(tokens.get(idx++));
        e.tag = rev.get(t);
        while (!tokens.get(idx).equals("0")) {
            int at = Integer.parseInt(tokens.get(idx++));
            StringBuilder val = new StringBuilder();
            while (!tokens.get(idx).equals("0")) val.append(tokens.get(idx++)).append(' ');
            idx++;
            e.attributes.add(new AbstractMap.SimpleEntry<>(rev.get(at), val.toString().trim()));
        }
        idx++;
        while (!tokens.get(idx).equals("0")) e.children.add(parse());
        idx++;
        return e;
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
data class Element(
    val tag: String,
    var attributes: List<Pair<String,String>> = listOf(),
    var children: List<Element> = listOf()
)

class Solution(val token: Map<String, Int>, val rev: Map<Int,String>) {
    lateinit var tokens: List<String>
    var idx = 0

    fun encode(root: Element): List<String> {
        val out = mutableListOf<String>()
        fun dfs(e: Element) {
            out += token[e.tag].toString()
            for ((k, v) in e.attributes) {
                out += token[k].toString()
                out += v
                out += "0"
            }
            out += "0"
            for (c in e.children) dfs(c)
            out += "0"
        }
        dfs(root)
        return out
    }

    fun decode(tokens: List<String>): Element {
        this.tokens = tokens; idx = 0
        return parse()
    }

    private fun parse(): Element {
        val t = tokens[idx++].toInt()
        val e = Element(rev[t]!!)
        while (tokens[idx] != "0") {
            val at = tokens[idx++].toInt()
            val parts = mutableListOf<String>()
            while (tokens[idx] != "0") parts.add(tokens[idx++])
            idx++
            e.attributes = e.attributes + Pair(rev[at]!!, parts.joinToString(" "))
        }
        idx++
        val children = mutableListOf<Element>()
        while (tokens[idx] != "0") children += parse()
        idx++
        e.children = children
        return e
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from __future__ import annotations
from typing import List, Tuple, Dict

class Element:
    def __init__(self, tag: str, attributes: List[Tuple[str,str]] = None, children: List['Element'] = None):
        self.tag = tag
        self.attributes = attributes or []
        self.children = children or []

class Solution:
    def __init__(self, token: Dict[str,int]):
        self.token = token
        self.rev = {v:k for k,v in token.items()}
        self.tokens: List[str] = []
        self.i = 0

    def encode(self, root: Element) -> List[str]:
        out: List[str] = []

        def dfs(e: Element) -> None:
            out.append(str(self.token[e.tag]))
            for (k, v) in e.attributes:
                out.append(str(self.token[k]))
                out.append(v)
                out.append("0")
            out.append("0")
            for c in e.children:
                dfs(c)
            out.append("0")

        dfs(root)
        return out

    def decode(self, tokens: List[str]) -> Element:
        self.tokens = tokens
        self.i = 0
        return self._parse()

    def _parse(self) -> Element:
        t = int(self.tokens[self.i]); self.i += 1
        e = Element(self.rev[t])
        # attributes
        while self.tokens[self.i] != '0':
            at = int(self.tokens[self.i]); self.i += 1
            parts = []
            while self.tokens[self.i] != '0':
                parts.append(self.tokens[self.i]); self.i += 1
            self.i += 1
            e.attributes.append((self.rev[at], ' '.join(parts)))
        self.i += 1
        # children
        while self.tokens[self.i] != '0':
            e.children.append(self._parse())
        self.i += 1
        return e
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
use std::collections::HashMap;

pub struct Element {
    pub tag: String,
    pub attributes: Vec<(String,String)>,
    pub children: Vec<Element>,
}

pub struct Solution {
    pub token: HashMap<String,i32>,
    pub rev: HashMap<i32,String>,
}

impl Solution {
    pub fn encode(&self, root: &Element) -> Vec<String> {
        let mut out: Vec<String> = Vec::new();
        fn dfs(s: &Solution, e: &Element, out: &mut Vec<String>) {
            out.push(s.token.get(&e.tag).unwrap().to_string());
            for (k,v) in &e.attributes {
                out.push(s.token.get(k).unwrap().to_string());
                out.push(v.clone());
                out.push("0".to_string());
            }
            out.push("0".to_string());
            for c in &e.children { dfs(s,c,out); }
            out.push("0".to_string());
        }
        dfs(self, root, &mut out);
        out
    }

    pub fn decode(&self, tokens: &[String]) -> Element {
        let mut idx = 0usize;
        fn parse(s: &Solution, tokens: &[String], idx: &mut usize) -> Element {
            let t = tokens[*idx].parse::<i32>().unwrap(); *idx += 1;
            let mut e = Element { tag: s.rev.get(&t).unwrap().clone(), attributes: vec![], children: vec![] };
            while tokens[*idx] != "0" {
                let at = tokens[*idx].parse::<i32>().unwrap(); *idx += 1;
                let mut parts = Vec::new();
                while tokens[*idx] != "0" { parts.push(tokens[*idx].clone()); *idx += 1; }
                *idx += 1;
                e.attributes.push((s.rev.get(&at).unwrap().clone(), parts.join(" ")));
            }
            *idx += 1;
            while tokens[*idx] != "0" { e.children.push(parse(s, tokens, idx)); }
            *idx += 1;
            e
        }
        parse(self, tokens, &mut idx)
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
type Attribute = [string,string];
type Element = { tag: string; attributes?: Attribute[]; children?: Element[] };

class Solution {
  token: Record<string, number>;
  rev: Record<number, string>;
  tokens: string[] = [];
  idx = 0;
  constructor(token: Record<string, number>, rev: Record<number,string>) { this.token = token; this.rev = rev; }

  encode(root: Element): string[] {
    const out: string[] = [];
    const dfs = (e: Element) => {
      out.push(String(this.token[e.tag]));
      for (const a of e.attributes || []) {
        out.push(String(this.token[a[0]]));
        out.push(a[1]);
        out.push('0');
      }
      out.push('0');
      for (const c of e.children || []) dfs(c);
      out.push('0');
    };
    dfs(root);
    return out;
  }

  decode(tokens: string[]): Element {
    this.tokens = tokens; this.idx = 0;
    return this.parse();
  }

  private parse(): Element {
    const t = Number(this.tokens[this.idx++]);
    const e: Element = { tag: this.rev[t], attributes: [], children: [] };
    while (this.tokens[this.idx] !== '0') {
      const at = Number(this.tokens[this.idx++]);
      const parts: string[] = [];
      while (this.tokens[this.idx] !== '0') parts.push(this.tokens[this.idx++]);
      this.idx++;
      e.attributes.push([this.rev[at], parts.join(' ')]);
    }
    this.idx++;
    while (this.tokens[this.idx] !== '0') e.children.push(this.parse());
    this.idx++;
    return e;
  }
}