Adding Files to sitemap.xml
The sitemap.xml file helps search engines to index your content. You probably already use the Typo3 SEO extension to index your pages, and, if you use the news plugin, your news items too. I got a lot of PDF files, and I want them to be indexed too. The documentation of the SEO extension documents how to write your own item provider for sitemap.xml, and in fact it is easy to write your own. I decided to add the resulting class to my site package, so it resides in the subdirectory Classes/Seo and the file is called FilesXMLSitemapDataProvider.php.
You have to edit your config.typoscript to utilize your data provider. filetypes takes a list of extensions, comma separated. folders takes a list of folders and recursive should only be mentioned if subfolders shall be searched too. Please note that the extension does not parse the file system, it uses the Typo3 FAL. Mine looks like this:
Template setup snippet
plugin.tx_seo {
config {
xmlSitemap {
sitemaps {
pages {
config {
additionalWhere = AND no_index = 0
}
}
pdffiles {
// use and configure my own provider
provider = Lespf\Phasenumkehrpackage\Seo\FilesXMLSitemapDataProvider
config {
filetypes = pdf
recursive = true
folders = pharma, computing
}
}
}
}
}
}
The DataProvider Class
The class itself is not perfect, but it should be easy to adopt. Shame on me, it even holds a hard coded path name!
FilesXMLSitemapDataProvider.php
<?php
declare(strict_types=1);
/*
* This file is part of the "phasenumkehrpackage" Extension for TYPO3 CMS.
*
* License ist like Typo3 core license.
*/
namespace Lespf\Phasenumkehrpackage\Seo;
/* file access */
use TYPO3\CMS\Core\Resource\Exception\InsufficientFolderAccessPermissionsException;
use TYPO3\CMS\Core\Resource\File;
use TYPO3\CMS\Core\Resource\Folder;
use TYPO3\CMS\Core\Resource\InaccessibleFolder;
use TYPO3\CMS\Core\Resource\StorageRepository;
/* xml indexing */
use Psr\Http\Message\ServerRequestInterface;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer;
use TYPO3\CMS\Seo\XmlSitemap\AbstractXmlSitemapDataProvider;
use TYPO3\CMS\Seo\XmlSitemap\Exception\MissingConfigurationException;
/**
* Generate Sitemap for PDF files
*/
class FilesXMlSitemapDataProvider extends AbstractXmlSitemapDataProvider
{
/**
* The number of all elements
*
* @var int
*/
protected $itemCount = 0;
/**
* The youngest file's modification date
*
* @var int
*/
protected $latestModified = 0;
private ?StorageRepository $storageRepository = null;
private $filetypes;
private $recursive = false;
/**
* @param ServerRequestInterface $request
* @param string $key
* @param array $config
* @param ContentObjectRenderer|null $cObj
* @throws MissingConfigurationException
*/
public function __construct(ServerRequestInterface $request,
string $key,
array $config = [],
ContentObjectRenderer $cObj = null
)
{
parent::__construct($request, $key, $config, $cObj);
$myState = 0;
$this->storageRepository = GeneralUtility::makeInstance(StorageRepository::class);
$filetypes = explode(',', $this->config['filetypes']);
foreach ($filetypes as $type) {
$this->filetypes[trim(strtolower($type))] = 1;
}
if(array_key_exists('recursive', $this->config)) {
$this->recursive = true;
}
$this->generateItems();
}
private function adjustStats(): void
{
$this->itemCount = 0;
$this->latestModified = 0;
foreach ($this->items as $item) {
if ($item['lastMod'] > $this->latestModified) {
$this->latestModified = $item['lastMod'];
}
$this->itemCount++;
}
}
private function fileMeetsRequirements(File $file): bool
{
$ext = strtolower($file->getExtension());
if (array_key_exists($ext, $this->filetypes)) {
return true;
}
return false;
}
private function parseFolder(Folder $folder): void
{
try {
// $foldername = $folder->getName();
$foldername = '/fileadmin' . $folder->getReadablePath();
/** @var File[] $files */
$files = $folder->getFiles();
foreach ($files as $file) {
if (! $this->fileMeetsRequirements($file)) {
continue;
}
$modTime = $file->getModificationTime();
$fullName = $foldername . $file->getName();
$fd = [
'loc' => $fullName,
'lastMod' => $modTime
];
$this->items[] = $fd;
}
if ($this->recursive == true) {
$subfolders = $folder->getSubfolders();
foreach ($subfolders as $subfolder) {
$this->parseFolder($subfolder);
}
}
} catch (InsufficientFolderAccessPermissionsException $e) {
// ... do some exception handling
$this->itemCount = 0;
$this->items = [];
}
}
public function generateItems(): void
{
$this->itemCount = 0;
$this->items = [];
$folders = explode(',', $this->config["folders"]);
if ($this->storageRepository == null) {
return;
}
$defaultStorage = $this->storageRepository->getDefaultStorage();
// $masterfoldername = $defaultStorage->getName();
try {
/** @var Folder|InaccessibleFolder $folder */
foreach ( $folders as $folderpartial) {
$folder = $defaultStorage->getFolder('/phasenumkehr/'.
trim($folderpartial)
. '/');
$this->parseFolder($folder);
}
} catch (InsufficientFolderAccessPermissionsException $e) {
// ... do some exception handling
}
$this->adjustStats();
}
/**
* Get the current items
*
* @return array
*/
public function getItems(): array
{
/* $pageNumber = (int)($this->request->getQueryParams()['page'] ?? 0);
$page = $pageNumber > 0 ? $pageNumber : 0;
$items = array_slice(
$this->items,
$page * $this->numberOfItemsPerPage,
$this->numberOfItemsPerPage
);
return array_map([$this, 'defineUrl'], $items);*/
if ($this->itemCount == 0) {
$this->generateItems();
}
return $this->items;
}
/**
* Get the number of pages
*
* @return int
*/
public function getNumberOfPages(): int
{
return (int)ceil($this->itemCount / $this->numberOfItemsPerPage);
}
public function getLastModified(): int
{
return $this->latestModified;
}
}