fecfb7a20053e5d793ede409eb3f034c99e375d5
[openwrt/staging/linusw.git] /
1 From d24229dcef58e0162780ceffa02eb5f6a01b9a4d Mon Sep 17 00:00:00 2001
2 From: Jonathan Bell <jonathan@raspberrypi.com>
3 Date: Tue, 16 Jul 2024 16:47:08 +0100
4 Subject: [PATCH 1185/1215] pinctrl: rp1: jump through hoops to avoid PCIe
5 latency issues
6
7 Automatic link power saving plus the ability of a root complex to buffer
8 pending posted write transfers (and consider them complete before being
9 transmitted on the wire) causes compression of updates to GPIO state.
10
11 The large bandwidth of a Gen 2 x4 link means the writes toggle state
12 inside RP1 as fast as it can go (~20MHz), which is bad for applications
13 wanting bitbash with at least a few microseconds of delay between
14 updates.
15
16 By tailoring IO access patterns to a special Root Complex register,
17 writes to GPIOs can be stalled until the link wakes - meaning all writes
18 end up with a reasonably consistent minimum pacing (~200ns).
19
20 Additionally, write barriers have no effect other than to arbitrarily
21 delay some writes by a small, variable amount - so remove the vast
22 majority of these in areas that could be hot-paths.
23
24 Although the IO memory is mapped with Device strongly-ordered semantics,
25 this doesn't prevent the splitter inside BCM2712 from letting an MMIO
26 read request to a GPIO register get ahead of the pacing writes to the
27 Root Complex register. So each pin state read must flush writes out to
28 the Outer-Shareable domain.
29
30 Signed-off-by: Jonathan Bell <jonathan@raspberrypi.com>
31 ---
32 drivers/pinctrl/pinctrl-rp1.c | 120 +++++++++++++++++++++++++++++-----
33 1 file changed, 105 insertions(+), 15 deletions(-)
34
35 --- a/drivers/pinctrl/pinctrl-rp1.c
36 +++ b/drivers/pinctrl/pinctrl-rp1.c
37 @@ -197,6 +197,7 @@ struct rp1_pin_info {
38 void __iomem *inte;
39 void __iomem *ints;
40 void __iomem *pad;
41 + void __iomem *dummy;
42 };
43
44 enum funcs {
45 @@ -276,6 +277,7 @@ struct rp1_pinctrl {
46 void __iomem *gpio_base;
47 void __iomem *rio_base;
48 void __iomem *pads_base;
49 + void __iomem *dummy_base;
50 int irq[RP1_NUM_BANKS];
51 struct rp1_pin_info pins[RP1_NUM_GPIOS];
52
53 @@ -577,6 +579,42 @@ static bool persist_gpio_outputs = true;
54 module_param(persist_gpio_outputs, bool, 0644);
55 MODULE_PARM_DESC(persist_gpio_outputs, "Enable GPIO_OUT persistence when pin is freed");
56
57 +static bool pace_pin_updates = true;
58 +module_param(pace_pin_updates, bool, 0644);
59 +MODULE_PARM_DESC(pace_pin_updates, "Update pin states with guaranteed monotonicity if PCIe ASPM is enabled");
60 +
61 +static inline void rp1_pin_writel(u32 val, void __iomem *dummy, void __iomem *reg)
62 +{
63 + unsigned long flags;
64 +
65 + local_irq_save(flags);
66 + /*
67 + * Issuing 6 pipelined writes to the RC's Slot Control register will stall the
68 + * peripheral bus inside 2712 if the link is in L1. This acts as a lightweight
69 + * "fence" operation preventing back-to-back writes arriving at RP1 on a wake.
70 + */
71 + if (dummy) {
72 + writel_relaxed(0, dummy);
73 + writel_relaxed(0, dummy);
74 + writel_relaxed(0, dummy);
75 + writel_relaxed(0, dummy);
76 + writel_relaxed(0, dummy);
77 + writel_relaxed(0, dummy);
78 + }
79 + writel_relaxed(val, reg);
80 + local_irq_restore(flags);
81 +}
82 +
83 +static inline u32 rp1_pin_readl(const void __iomem *ioaddr)
84 +{
85 + /*
86 + * Prior posted writes may not yet have been emitted by the CPU - do a store-flush
87 + * before reading GPIO state, as this will serialise writes versus the next issued read.
88 + */
89 + __dma_wmb();
90 + return readl(ioaddr);
91 +}
92 +
93 static int rp1_pinconf_set(struct pinctrl_dev *pctldev,
94 unsigned int offset, unsigned long *configs,
95 unsigned int num_configs);
96 @@ -603,12 +641,12 @@ static struct rp1_pin_info *rp1_get_pin_
97
98 static void rp1_pad_update(struct rp1_pin_info *pin, u32 clr, u32 set)
99 {
100 - u32 padctrl = readl(pin->pad);
101 + u32 padctrl = rp1_pin_readl(pin->pad);
102
103 padctrl &= ~clr;
104 padctrl |= set;
105
106 - writel(padctrl, pin->pad);
107 + rp1_pin_writel(padctrl, pin->dummy, pin->pad);
108 }
109
110 static void rp1_input_enable(struct rp1_pin_info *pin, int value)
111 @@ -625,7 +663,7 @@ static void rp1_output_enable(struct rp1
112
113 static u32 rp1_get_fsel(struct rp1_pin_info *pin)
114 {
115 - u32 ctrl = readl(pin->gpio + RP1_GPIO_CTRL);
116 + u32 ctrl = rp1_pin_readl(pin->gpio + RP1_GPIO_CTRL);
117 u32 oeover = FLD_GET(ctrl, RP1_GPIO_CTRL_OEOVER);
118 u32 fsel = FLD_GET(ctrl, RP1_GPIO_CTRL_FUNCSEL);
119
120 @@ -637,7 +675,7 @@ static u32 rp1_get_fsel(struct rp1_pin_i
121
122 static void rp1_set_fsel(struct rp1_pin_info *pin, u32 fsel)
123 {
124 - u32 ctrl = readl(pin->gpio + RP1_GPIO_CTRL);
125 + u32 ctrl = rp1_pin_readl(pin->gpio + RP1_GPIO_CTRL);
126
127 if (fsel >= RP1_FSEL_COUNT)
128 fsel = RP1_FSEL_NONE_HW;
129 @@ -652,12 +690,12 @@ static void rp1_set_fsel(struct rp1_pin_
130 FLD_SET(ctrl, RP1_GPIO_CTRL_OEOVER, RP1_OEOVER_PERI);
131 }
132 FLD_SET(ctrl, RP1_GPIO_CTRL_FUNCSEL, fsel);
133 - writel(ctrl, pin->gpio + RP1_GPIO_CTRL);
134 + rp1_pin_writel(ctrl, pin->dummy, pin->gpio + RP1_GPIO_CTRL);
135 }
136
137 static int rp1_get_dir(struct rp1_pin_info *pin)
138 {
139 - return !(readl(pin->rio + RP1_RIO_OE) & (1 << pin->offset)) ?
140 + return !(rp1_pin_readl(pin->rio + RP1_RIO_OE) & (1 << pin->offset)) ?
141 RP1_DIR_INPUT : RP1_DIR_OUTPUT;
142 }
143
144 @@ -665,19 +703,19 @@ static void rp1_set_dir(struct rp1_pin_i
145 {
146 int offset = is_input ? RP1_CLR_OFFSET : RP1_SET_OFFSET;
147
148 - writel(1 << pin->offset, pin->rio + RP1_RIO_OE + offset);
149 + rp1_pin_writel(1 << pin->offset, pin->dummy, pin->rio + RP1_RIO_OE + offset);
150 }
151
152 static int rp1_get_value(struct rp1_pin_info *pin)
153 {
154 - return !!(readl(pin->rio + RP1_RIO_IN) & (1 << pin->offset));
155 + return !!(rp1_pin_readl(pin->rio + RP1_RIO_IN) & (1 << pin->offset));
156 }
157
158 static void rp1_set_value(struct rp1_pin_info *pin, int value)
159 {
160 /* Assume the pin is already an output */
161 - writel(1 << pin->offset,
162 - pin->rio + RP1_RIO_OUT + (value ? RP1_SET_OFFSET : RP1_CLR_OFFSET));
163 + rp1_pin_writel(1 << pin->offset, pin->dummy,
164 + pin->rio + RP1_RIO_OUT + (value ? RP1_SET_OFFSET : RP1_CLR_OFFSET));
165 }
166
167 static int rp1_gpio_get(struct gpio_chip *chip, unsigned offset)
168 @@ -1298,7 +1336,7 @@ static const struct pinmux_ops rp1_pmx_o
169
170 static void rp1_pull_config_set(struct rp1_pin_info *pin, unsigned int arg)
171 {
172 - u32 padctrl = readl(pin->pad);
173 + u32 padctrl = rp1_pin_readl(pin->pad);
174
175 FLD_SET(padctrl, RP1_PAD_PULL, arg & 0x3);
176
177 @@ -1398,7 +1436,7 @@ static int rp1_pinconf_get(struct pinctr
178 if (!pin)
179 return -EINVAL;
180
181 - padctrl = readl(pin->pad);
182 + padctrl = rp1_pin_readl(pin->pad);
183
184 switch (param) {
185 case PIN_CONFIG_INPUT_ENABLE:
186 @@ -1493,6 +1531,7 @@ static int rp1_pinctrl_probe(struct plat
187 {
188 struct device *dev = &pdev->dev;
189 struct device_node *np = dev->of_node;
190 + struct device_node *rp1_node = NULL;
191 struct rp1_pinctrl *pc;
192 struct gpio_irq_chip *girq;
193 int err, i;
194 @@ -1528,6 +1567,40 @@ static int rp1_pinctrl_probe(struct plat
195 pc->gpio_chip = rp1_gpio_chip;
196 pc->gpio_chip.parent = dev;
197
198 + /*
199 + * Workaround for the vagaries of PCIe on BCM2712
200 + *
201 + * If the link to RP1 is in L1, then the BRCMSTB RC will buffer many
202 + * outbound writes - and generate write responses for them, despite the
203 + * fact that the link is not yet active. This has the effect of compressing
204 + * multiple writes to GPIOs together, destroying any pacing that an application
205 + * may require in the 1-10us range.
206 + *
207 + * The RC Slot Control configuration register is special. It emits a
208 + * MsgD for every write to it, will stall further writes until the message
209 + * goes out on the wire. This can be (ab)used to force CPU stalls when the
210 + * link is inactive, at the cost of a small amount of downstream bandwidth
211 + * and some 200ns of added latency for each write.
212 + *
213 + * Several back-to-back configuration writes are necessary to "fill the pipe",
214 + * otherwise the outbound MAC can consume a pending MMIO write and reorder
215 + * it with respect to the config writes - undoing the intent.
216 + *
217 + * of_iomap() is used directly here as the address overlaps with the RC driver's
218 + * usage.
219 + */
220 + rp1_node = of_find_node_by_name(NULL, "rp1");
221 + if (!rp1_node)
222 + dev_err(&pdev->dev, "failed to find RP1 DT node\n");
223 + else if (pace_pin_updates &&
224 + of_device_is_compatible(rp1_node->parent, "brcm,bcm2712-pcie")) {
225 + pc->dummy_base = of_iomap(rp1_node->parent, 0);
226 + if (IS_ERR(pc->dummy_base)) {
227 + dev_warn(&pdev->dev, "could not map bcm2712 root complex registers\n");
228 + pc->dummy_base = NULL;
229 + }
230 + }
231 +
232 for (i = 0; i < RP1_NUM_BANKS; i++) {
233 const struct rp1_iobank_desc *bank = &rp1_iobanks[i];
234 int j;
235 @@ -1547,14 +1620,17 @@ static int rp1_pinctrl_probe(struct plat
236 pin->rio = pc->rio_base + bank->rio_offset;
237 pin->pad = pc->pads_base + bank->pads_offset +
238 j * sizeof(u32);
239 + pin->dummy = pc->dummy_base ? pc->dummy_base + 0xc0 : NULL;
240 }
241
242 raw_spin_lock_init(&pc->irq_lock[i]);
243 }
244
245 pc->pctl_dev = devm_pinctrl_register(dev, &rp1_pinctrl_desc, pc);
246 - if (IS_ERR(pc->pctl_dev))
247 - return PTR_ERR(pc->pctl_dev);
248 + if (IS_ERR(pc->pctl_dev)) {
249 + err = PTR_ERR(pc->pctl_dev);
250 + goto out_iounmap;
251 + }
252
253 girq = &pc->gpio_chip.irq;
254 girq->chip = &rp1_gpio_irq_chip;
255 @@ -1583,7 +1659,7 @@ static int rp1_pinctrl_probe(struct plat
256 err = devm_gpiochip_add_data(dev, &pc->gpio_chip, pc);
257 if (err) {
258 dev_err(dev, "could not add GPIO chip\n");
259 - return err;
260 + goto out_iounmap;
261 }
262
263 pc->gpio_range = rp1_pinctrl_gpio_range;
264 @@ -1592,10 +1668,24 @@ static int rp1_pinctrl_probe(struct plat
265 pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range);
266
267 return 0;
268 +
269 +out_iounmap:
270 + if (pc->dummy_base)
271 + iounmap(pc->dummy_base);
272 + return err;
273 +}
274 +
275 +static void rp1_pinctrl_remove(struct platform_device *pdev)
276 +{
277 + struct rp1_pinctrl *pc = platform_get_drvdata(pdev);
278 +
279 + if (pc->dummy_base)
280 + iounmap(pc->dummy_base);
281 }
282
283 static struct platform_driver rp1_pinctrl_driver = {
284 .probe = rp1_pinctrl_probe,
285 + .remove_new = rp1_pinctrl_remove,
286 .driver = {
287 .name = MODULE_NAME,
288 .of_match_table = rp1_pinctrl_match,