1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
use itertools::Itertools as _;
use log::debug;
use std::{io, iter::Peekable};

enum Attr {
	Doc {
		/// The indent before the doc comment
		indent: String,
		/// The style (e.g. `//!`) of the doc comment
		style: String,
		/// The indent of the comment itself
		comment_indent: String,
		/// The comment trimmed of any indent
		comment: String
	},
	Verbatim {
		/// An entire line of source code
		line: String
	}
}

pub struct Preprocessor<L>
where
	L: Iterator<Item = io::Result<String>>
{
	/// Remaining lines to read from the underlying reader
	lines: Peekable<L>,
	/// Buffer of processed lines ready to be read
	buf: Vec<u8>
}

impl<R> Preprocessor<io::Lines<R>>
where
	R: io::BufRead
{
	pub fn new(read: R) -> Self {
		Self {
			lines: read.lines().peekable(),
			buf: Vec::new()
		}
	}
}

impl<L> Preprocessor<L>
where
	L: Iterator<Item = io::Result<String>>
{
	fn fill_buf(&mut self) -> io::Result<()> {
		let mut attrs = Vec::<Attr>::new();
		while let Some(line) = self.lines.peek() {
			let line = match line {
				Ok(line) => line,
				Err(_) => break
			};
			let trimmed = line.trim_start();
			if trimmed.starts_with("//!") || trimmed.starts_with("///") {
				let line = self.lines.next().unwrap().unwrap();
				let mut chars = line.chars();
				let indent = chars
					.peeking_take_while(|ch| ch.is_whitespace())
					.collect::<String>();
				let style = (&mut chars).take(3).collect::<String>();
				let comment_indent = chars
					.peeking_take_while(|ch| ch.is_whitespace())
					.collect::<String>();
				let comment = chars.collect::<String>();
				attrs.push(Attr::Doc {
					indent,
					style,
					comment_indent,
					comment
				});
			} else if trimmed.is_empty()
				|| trimmed.starts_with("//")
				|| trimmed.starts_with('#')
			{
				// line that might sit between doc comments
				attrs.push(Attr::Verbatim {
					line: self.lines.next().unwrap().unwrap()
				});
			} else {
				// we've encountered the end of the doc comment
				break;
			}
		}

		let mut common_indent: Option<String> = None;
		for attr in &attrs {
			if let Attr::Doc {
				comment_indent,
				comment,
				..
			} = attr
			{
				match &common_indent {
					Some(common)
						if !comment_indent.starts_with(common) && !comment.is_empty() =>
					{
						common_indent = Some(
							common
								.chars()
								.zip(comment_indent.chars())
								.take_while(|(lhs, rhs)| lhs == rhs)
								.map(|(ch, _)| ch)
								.collect()
						);
					},
					None => {
						common_indent = Some(
							comment_indent
								.chars()
								.take_while(|ch| ch.is_whitespace())
								.collect()
						);
					},
					_ => {}
				}
			}
		}
		let common_indent_len = common_indent
			.map(|common| common.as_bytes().len())
			.unwrap_or(0);
		debug!(
			"Removing common indent of {common_indent_len} bytes from {} lines",
			attrs.len()
		);

		for attr in attrs {
			match attr {
				Attr::Doc {
					indent,
					style,
					comment_indent,
					comment
				} => {
					self.buf.extend_from_slice(indent.as_bytes());
					self.buf.extend_from_slice(style.as_bytes());
					self.buf
						.extend(comment_indent.bytes().skip(common_indent_len));
					self.buf.extend_from_slice(comment.as_bytes());
				},
				Attr::Verbatim { line } => {
					self.buf.extend_from_slice(line.as_bytes());
				}
			}
			self.buf.push(b'\n');
		}

		// the next line should not be part of the doc comment
		if let Some(line) = self.lines.next() {
			self.buf.extend_from_slice(line?.as_bytes());
			self.buf.push(b'\n');
		}

		Ok(())
	}
}

impl<L> io::Read for Preprocessor<L>
where
	L: Iterator<Item = io::Result<String>>
{
	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
		if self.buf.is_empty() {
			self.fill_buf()?;
		}

		let bytes = buf.len().min(self.buf.len());
		let (head, tail) = self.buf.split_at(bytes);
		buf[0 .. bytes].clone_from_slice(head);
		self.buf = tail.to_owned();
		Ok(bytes)
	}
}