A news article scraped from an RSS feed or website.
tid
Timestamp-based ID
Properties
author
string
Optional
Author byline as published.
maxLength: 512 bytesmaxGraphemes: 128 graphemescontent
string
Optional
Full article body when the feed provides it (RSS <content:encoded>, Atom <content>, JSON Feed `content_html` / `content_text`). Powers Postgres FTS.
maxLength: 50000 bytesmaxGraphemes: 15000 graphemescreatedAt
string
datetime
Required
When this record was created in the AT Protocol network.
description
string
Optional
Short lead / summary only. Maps to RSS <description>, Atom <summary>, JSON Feed `summary`. No fallback to the full body — that lives in `content`.
maxLength: 5000 bytesmaxGraphemes: 1500 graphemesfeedCategory
string
Optional
DEPRECATED — equivalent to tags[0]. Kept for backward compatibility with consumers that read it; new consumers should read `tags`. Will be removed in a future revision.
maxLength: 256 bytesmaxGraphemes: 64 graphemesguid
string
Optional
Original GUID from the RSS feed, used for deduplication.
maxLength: 1024 bytesimageUrl
string
uri
Optional
URL of the article's featured image.
language
string
language
Optional
Language of the article content (BCP-47, e.g., 'es', 'en', 'pt-BR').
mediaCaption
string
Optional
Caption for imageUrl when the feed provides one (Google News <image:caption>, RSS Media <media:description>).
maxLength: 500 bytesmaxGraphemes: 200 graphemesoriginalSource
object
Optional
Set when the feed declares the article was syndicated from another outlet (RSS <source url>, <dc:source>). Distinguishes legitimate republication (e.g., OEM regional papers carrying a Sol de México story) from accidental duplicates.
publishedAt
string
datetime
Required
When the article was first published by the source.
source
ref
com.atproto.repo.strongRef
Required
Strong reference to the tech.transparencia.news.source record for this outlet.
tags
array
of
string
Optional
All categories the source attached to the article (RSS <category>, <dc:subject>, Atom <category term>, Google News <news:keywords>, JSON Feed `tags`).
maxLength: 50 itemstitle
string
Required
Original headline of the article as published by the source.
maxLength: 2048 bytesmaxGraphemes: 512 graphemesupdatedAt
string
datetime
Optional
When the source last edited the article (RSS <dcterms:modified>, Atom <updated> when distinct from publish time, JSON Feed `date_modified`).
url
string
uri
Required
Canonical URL of the article on the source website.
View raw schema
{
"key": "tid",
"type": "record",
"record": {
"type": "object",
"required": [
"title",
"url",
"source",
"publishedAt",
"createdAt"
],
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "Canonical URL of the article on the source website."
},
"guid": {
"type": "string",
"maxLength": 1024,
"description": "Original GUID from the RSS feed, used for deduplication."
},
"tags": {
"type": "array",
"items": {
"type": "string",
"maxLength": 200,
"maxGraphemes": 64
},
"maxLength": 50,
"description": "All categories the source attached to the article (RSS <category>, <dc:subject>, Atom <category term>, Google News <news:keywords>, JSON Feed `tags`)."
},
"title": {
"type": "string",
"maxLength": 2048,
"description": "Original headline of the article as published by the source.",
"maxGraphemes": 512
},
"author": {
"type": "string",
"maxLength": 512,
"description": "Author byline as published.",
"maxGraphemes": 128
},
"source": {
"ref": "com.atproto.repo.strongRef",
"type": "ref",
"description": "Strong reference to the tech.transparencia.news.source record for this outlet."
},
"content": {
"type": "string",
"maxLength": 50000,
"description": "Full article body when the feed provides it (RSS <content:encoded>, Atom <content>, JSON Feed `content_html` / `content_text`). Powers Postgres FTS.",
"maxGraphemes": 15000
},
"imageUrl": {
"type": "string",
"format": "uri",
"description": "URL of the article's featured image."
},
"language": {
"type": "string",
"format": "language",
"description": "Language of the article content (BCP-47, e.g., 'es', 'en', 'pt-BR')."
},
"createdAt": {
"type": "string",
"format": "datetime",
"description": "When this record was created in the AT Protocol network."
},
"updatedAt": {
"type": "string",
"format": "datetime",
"description": "When the source last edited the article (RSS <dcterms:modified>, Atom <updated> when distinct from publish time, JSON Feed `date_modified`)."
},
"description": {
"type": "string",
"maxLength": 5000,
"description": "Short lead / summary only. Maps to RSS <description>, Atom <summary>, JSON Feed `summary`. No fallback to the full body — that lives in `content`.",
"maxGraphemes": 1500
},
"publishedAt": {
"type": "string",
"format": "datetime",
"description": "When the article was first published by the source."
},
"feedCategory": {
"type": "string",
"maxLength": 256,
"description": "DEPRECATED — equivalent to tags[0]. Kept for backward compatibility with consumers that read it; new consumers should read `tags`. Will be removed in a future revision.",
"maxGraphemes": 64
},
"mediaCaption": {
"type": "string",
"maxLength": 500,
"description": "Caption for imageUrl when the feed provides one (Google News <image:caption>, RSS Media <media:description>).",
"maxGraphemes": 200
},
"originalSource": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri"
},
"name": {
"type": "string",
"maxLength": 256,
"maxGraphemes": 100
}
},
"description": "Set when the feed declares the article was syndicated from another outlet (RSS <source url>, <dc:source>). Distinguishes legitimate republication (e.g., OEM regional papers carrying a Sol de México story) from accidental duplicates."
}
}
},
"description": "A news article scraped from an RSS feed or website."
}