Author:
Hash:
Timestamp:
+163 -37 +/-2 browse
Kevin Schoon [me@kevinschoon.com]
93302241e2a312742c4e60d69d5c375c20a9ab40
Thu, 05 Jun 2025 12:41:27 +0000 (2 days ago)
1 | diff --git a/README.md b/README.md |
2 | index af7bf76..405be98 100644 |
3 | --- a/README.md |
4 | +++ b/README.md |
5 | @@ -11,12 +11,32 @@ anywhere.* |
6 | |
7 | A repository URI provides a unique identifier for referencing a repository |
8 | that is present in a given code forge. This value is similar to |
9 | - [RFC7565](https://datatracker.ietf.org/doc/html/rfc7565). |
10 | + [RFC7565](https://datatracker.ietf.org/doc/html/rfc7565). The slug and |
11 | + hostname part MUST match the URI path specification as defined in |
12 | + [RFC3986-3.3](https://www.rfc-editor.org/rfc/rfc3986#section-3.3) while the |
13 | + hostname, if specified, must match [RFC3986-3.2.2](https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2) |
14 | |
15 | - repository-uri = "repository:"<slug>[@hostname] |
16 | + repository-uri = prefix slug hostname |
17 | + |
18 | + prefix = "repository:" |
19 | + slug = rfc3986-path |
20 | + hostname = [@ rfc3986-hostname] |
21 | + |
22 | + ### Slug |
23 | + |
24 | + The Slug represents a unique string that identifies a repository at a |
25 | + particular code forge. Repository refers to a VCS managed codebase of some |
26 | + kind, e.g. a Git repository. |
27 | + |
28 | + ### Hostname |
29 | |
30 | If the hostname part is missing then the address of the server receiving the |
31 | - query is assumed. |
32 | + query is assumed. For example the following two queries are equivalent: |
33 | + |
34 | + ```text |
35 | + https://example.org/.well-known/webfinger?resource=repository:example/spartacus |
36 | + https://example.org/.well-known/webfinger?resource=repository:example/spartacus@example.org |
37 | + ``` |
38 | |
39 | TODO: Make this an actual spec, for now, some Python: |
40 | |
41 | @@ -44,7 +64,7 @@ def to_string(slug, domain=None): |
42 | |
43 | A [WebFinger](https://webfinger.net/spec/) query may be used to identify |
44 | detailed information about a public repository at a particular forge. Here is |
45 | - an example response about a ficticious repository: |
46 | + an example response about a fictitious repository: |
47 | |
48 | `GET https://example.org/.well-known/webfinger?resource=repository:example/spartacus` |
49 | |
50 | @@ -54,10 +74,6 @@ an example response about a ficticious repository: |
51 | "aliases": [ |
52 | "https://example.org/example/spartacus" |
53 | ], |
54 | - "properties": |
55 | - { |
56 | - "http://forge-feed.org/rel/vcs-type": "git" |
57 | - }, |
58 | "links": [ |
59 | { |
60 | "rel": "http://webfinger.net/rel/avatar", |
61 | @@ -80,17 +96,20 @@ an example response about a ficticious repository: |
62 | { |
63 | "rel": "http://forge-feed.org/rel/chatroom", |
64 | "href": "ircs://irc.libera.chat/#spartacus-game" |
65 | + "properties": { |
66 | + "http://feed-forge.org/ns/chatroom": "irc" |
67 | + } |
68 | }, |
69 | { |
70 | "rel": "http://forge-feed.org/rel/label", |
71 | "properties": { |
72 | - "label": "fortran" |
73 | + "http://feed-forge.org/ns/label": "fortran" |
74 | } |
75 | }, |
76 | { |
77 | "rel": "http://forge-feed.org/rel/label", |
78 | "properties": { |
79 | - "label": "text-adventure" |
80 | + "http://feed-forge.org/ns/label": "text-adventure" |
81 | } |
82 | } |
83 | ] |
84 | @@ -101,6 +120,8 @@ an example response about a ficticious repository: |
85 | |
86 | #### http://feed-forge.org/ns/vcs-type |
87 | |
88 | + Identifies VCS types, valid strings are: |
89 | + |
90 | bzr (GNU Bazaar) bazaar.canonical.com |
91 | darcs (Darcs) darcs.net |
92 | fossil (Fossil) fossil-scm.org |
93 | @@ -109,8 +130,30 @@ an example response about a ficticious repository: |
94 | pijul (Pijul) pijul.org |
95 | svn (Apache Subversion) subversion.apache.org |
96 | |
97 | + #### http://feed-forge.org/ns/chatroom |
98 | + |
99 | + Hint describing the backing type of chatroom. See [uri-schemes](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml). |
100 | + |
101 | + matrix |
102 | + irc |
103 | + xmpp |
104 | + |
105 | + #### http://feed-forge.org/ns/spdx-identifier |
106 | + |
107 | + Refers to a valid SPDX identifier, see [license-list](https://spdx.org/licenses/) |
108 | + |
109 | ### Link Extension Types |
110 | |
111 | + #### Avatar |
112 | + |
113 | + Forges that allow users to configure a logo can expose this information as |
114 | + an avatar for use in other applications. |
115 | + |
116 | + { |
117 | + "rel": "http://webfinger.net/rel/avatar", |
118 | + "href": "https://example.org/stylized-logo.png" |
119 | + }, |
120 | + |
121 | #### Homepage |
122 | |
123 | Link to an HTTP representation of the project codebase |
124 | @@ -140,7 +183,7 @@ A license SPDX identifier and link to the license's full text. |
125 | "rel": "http://feed-forge.org/rel/license", |
126 | "href": "https://example.com/example/spartacus/tree/LICENSE", |
127 | "properties": { |
128 | - "spdx-identifier": "GPL-2.0-or-later" |
129 | + "http://feed-forge.org/ns/spdx-identifier": "GPL-2.0-or-later" |
130 | } |
131 | } |
132 | |
133 | @@ -148,15 +191,45 @@ A license SPDX identifier and link to the license's full text. |
134 | |
135 | Links to chatrooms: IRC, Matrix, XMPP, etc. |
136 | |
137 | + ```json |
138 | { |
139 | "rel": "http://feed-forge.org/rel/chatroom", |
140 | "href": "ircs://irc.libera.chat/#spartacus-game" |
141 | + "properties": { |
142 | + "http://feed-forge.org/ns/chatroom": "irc" |
143 | + } |
144 | }, |
145 | + { |
146 | + "rel": "http://feed-forge.org/rel/chatroom", |
147 | + "href": "matrix:r/spartacus-game#example.org", |
148 | + "properties": { |
149 | + "http://feed-forge.org/ns/chatroom": "matrix" |
150 | + } |
151 | + }, |
152 | + { |
153 | + "rel": "http://feed-forge.org/rel/chatroom", |
154 | + "href": "xmpp:example.org/spartacus-game", |
155 | + "properties": { |
156 | + "http://feed-forge.org/ns/chatroom": "xmpp" |
157 | + } |
158 | + } |
159 | + ``` |
160 | |
161 | #### Mailing Lists |
162 | |
163 | Links to associated mailing lists, forms, etc. |
164 | |
165 | + ```json |
166 | + { |
167 | + "rel": "http://feed-forge.org/rel/mailing-list", |
168 | + "href": "mailto://list-name@mail.example.org", |
169 | + "properties": { |
170 | + "http://feed-forge.org/ns/mailing-list-subscribe": "mailto://subscribe+list-name@mail.example.org", |
171 | + "http://feed-forge.org/ns/mailing-list-unsubscribe": "mailto://unsubscribe+list-name@mail.example.org" |
172 | + } |
173 | + } |
174 | + ``` |
175 | + |
176 | |
177 | #### Free-Form Tags |
178 | |
179 | @@ -165,7 +238,7 @@ Arbitrary, free-form tags. |
180 | { |
181 | "rel": "http://example.org/rel/label", |
182 | "properties": { |
183 | - "label": "fortran" |
184 | + "http://feed-forge.org/ns/label": "fortran" |
185 | } |
186 | }, |
187 | { |
188 | @@ -174,6 +247,20 @@ Arbitrary, free-form tags. |
189 | "label": "text-adventure" |
190 | } |
191 | } |
192 | + |
193 | + ### Security |
194 | + |
195 | + Repositories which are not publicly available should not be identifiable by |
196 | + making webfinger queries at all. A repository which is private MUST return |
197 | + the same response as a repository which does not exist when making a webfinger |
198 | + request. |
199 | + |
200 | + GET https://example.org/.well-known/webfinger?resource=repository:example/spartacus |
201 | + 200 |
202 | + GET https://example.org/.well-known/webfinger?resource=repository:example/private-repository |
203 | + 404 |
204 | + GET https://example.org/.well-known/webfinger?resource=repository:example/non-existent-repository |
205 | + 404 |
206 | |
207 | # RSS Feeds |
208 | |
209 | @@ -185,24 +272,33 @@ activity is likely by your VCS's concept of a "commit". |
210 | |
211 | ## Determine if a Host Supports Forge Feed |
212 | |
213 | - Forges participating in the ForgeFeed protocol MUST respond to an HTTP |
214 | - GET request such as below: |
215 | - |
216 | - GET https://example.org/.well-known/forge-feed/firehose.xml |
217 | - |
218 | - Responses |
219 | - |
220 | - ### HTTP 200 |
221 | - |
222 | - This indicates the feed exists and the server will return the feed XML content directly. TODO: Is this valid for .well-known? |
223 | - |
224 | - ### HTTP 301 |
225 | - |
226 | - The server MAY respond with a 301 indicating a different location however the resource MUST reside on the server which the well-known request was made against. |
227 | - |
228 | - ### HTTP 501 |
229 | + In order to participate in ForgeFeed your forge MUST present an HTML link |
230 | + element such as below at the root domain of your forge. For example, |
231 | + `code.example.org` MUST have a link element present in it's html header: |
232 | + |
233 | + ```html |
234 | + <html> |
235 | + <head> |
236 | + <title>My Forge</title> |
237 | + <link |
238 | + rel="alternate" |
239 | + type="application/rss+xml" |
240 | + title="Recent Forge Activity" |
241 | + href="https://code.example.org/firehose.xml" /> |
242 | + <meta name="forge-feed:index" content="enabled"/> |
243 | + </head> |
244 | + <body> |
245 | + ... |
246 | + </body> |
247 | + </html> |
248 | + ``` |
249 | |
250 | - This indicates the server no longer wishes to be indexed and that the operator of a forge index should cease crawling operations. |
251 | + The link SHOULD have a title of "Recent Forge Activity" such that it uniquely |
252 | + identifies this feed as being related to forge updates. A server SHOULD also |
253 | + provide a meta tag with the name `forge-feed:index` which may be toggled |
254 | + with `content="enbabled"` or `content="disabled"`. If this tag is present and |
255 | + enabled then global index servers will periodically crawl the content here for |
256 | + discovery, if it is disabled then servers will remove it from their schedule. |
257 | |
258 | ### An example Feed |
259 | |
260 | @@ -252,6 +348,16 @@ about code repositories hosted on your server. |
261 | If the host section of the URI is included that is MUST match domain name |
262 | of the server which is providing the feed. |
263 | |
264 | + ### Security Concerns |
265 | + |
266 | + #### Private Repositories |
267 | + |
268 | + Forge-feed enabled RSS feeds have no support for sharing private repositories |
269 | + and any repository that is not shared publicly on the internet must be hidden |
270 | + from the RSS activity feed stream. If your forge provides the ability to change |
271 | + a repository from public to private it must be understood that clients may |
272 | + already have cached versions of your repository data. |
273 | + |
274 | ### Recommendations for Enumerating Repository Events |
275 | |
276 | #### Specify a Maximum Timeframe |
277 | @@ -259,6 +365,11 @@ of the server which is providing the feed. |
278 | Your activity feed should not include repository events that are older than 1 |
279 | week. |
280 | |
281 | + #### Repository Items SHOULD be Unique |
282 | + |
283 | + Although it is permitted to return duplicate repositories in the feed forges |
284 | + SHOULD only return unique repositories in a given window. |
285 | + |
286 | #### Event "Clamping" |
287 | |
288 | It may be undesirable to enumerate repository events items with the simple |
289 | @@ -267,16 +378,28 @@ heuristic of |
290 | end = time.now() |
291 | start = end - 6h |
292 | |
293 | + events = repository_events_between(start, end) |
294 | + |
295 | Because the oldest repository events will fall out of the time window on |
296 | - subsequent queries by RSS readers. Instead events can be "clamped" within a |
297 | - certain time period. For example you may choose to publish four buckets of |
298 | - updates per day: |
299 | + subsequent queries by RSS readers. This can cause some RSS readers to |
300 | + frequently request new content from your feed. Instead events can be |
301 | + "clamped" within a certain time period. For example you may choose to |
302 | + publish four buckets of updates per day: |
303 | |
304 | 00:00:00 06:00:00 |
305 | 06:00:00 12:00:00 |
306 | 12:00:00 18:00:00 |
307 | 18:00:00 00:00:00 |
308 | |
309 | - # Forge Discovery Index |
310 | + For example if the current time is 2021-03-05 13:00:55 UTC you could use |
311 | + the following heuristic to return a summary of repository events from the |
312 | + previous time bucket. |
313 | + |
314 | + now = time.now() |
315 | + start = now.set_time("06:00:00") |
316 | + end = now.set_time("12:00:00") |
317 | + |
318 | + events = repository_events_between(start, end) |
319 | |
320 | - TODO |
321 | + This approach with a TTL value of 60 (minutes) will reduce excess requests |
322 | + from some readers but allow all clients to receive timely updates. |
323 | diff --git a/repository-uri.py b/repository-uri.py |
324 | index dd4a254..26f49eb 100644 |
325 | --- a/repository-uri.py |
326 | +++ b/repository-uri.py |
327 | @@ -1,5 +1,5 @@ |
328 | - from urllib.parse import urlparse |
329 | - from urllib.parse import quote_plus |
330 | + from urllib.parse import urlparse, quote_plus |
331 | + |
332 | |
333 | def from_string(text): |
334 | url = urlparse(text) |
335 | @@ -10,6 +10,7 @@ def from_string(text): |
336 | return (split[0], split[1]) |
337 | return (split[0], None) |
338 | |
339 | + |
340 | def to_string(slug, domain=None): |
341 | if domain: |
342 | return quote_plus(f"repository:{slug}@{domain}") |
343 | @@ -20,6 +21,8 @@ def to_string(slug, domain=None): |
344 | print(from_string("repository:fuu/bar@example.org")) |
345 | print(from_string("repository:fuu/bar/baz/qux@example.org")) |
346 | print(from_string("repository:fuu/bar/baz/qux")) |
347 | + print(from_string("repository:~hello/world")) |
348 | + print(from_string("repository:~hello/world@example.org")) |
349 | + print(from_string("repository:~hello/world@example.org@aaaa")) |
350 | print(to_string("fuu/bar/baz", None)) |
351 | print(to_string("fuu/bar/baz", "example.org")) |
352 | - |