Proyectos de Subversion Moodle

Rev

| Ultima modificación | Ver Log |

Rev Autor Línea Nro. Línea
1 efrain 1
<?php
2
 
3
declare(strict_types=1);
4
 
5
namespace OpenSpout\Reader\XLSX\Manager\SharedStringsCaching;
6
 
7
/**
8
 * @internal
9
 */
10
final class CachingStrategyFactory implements CachingStrategyFactoryInterface
11
{
12
    /**
13
     * The memory amount needed to store a string was obtained empirically from this data:.
14
     *
15
     *        ------------------------------------
16
     *        | Number of chars⁺ | Memory needed |
17
     *        ------------------------------------
18
     *        |           3,000  |         1 MB  |
19
     *        |          15,000  |         2 MB  |
20
     *        |          30,000  |         5 MB  |
21
     *        |          75,000  |        11 MB  |
22
     *        |         150,000  |        21 MB  |
23
     *        |         300,000  |        43 MB  |
24
     *        |         750,000  |       105 MB  |
25
     *        |       1,500,000  |       210 MB  |
26
     *        |       2,250,000  |       315 MB  |
27
     *        |       3,000,000  |       420 MB  |
28
     *        |       4,500,000  |       630 MB  |
29
     *        ------------------------------------
30
     *
31
     *        ⁺ All characters were 1 byte long
32
     *
33
     * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
34
     * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
35
     * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
36
     *
37
     * This means that in order to store one shared string in memory, the memory amount needed is:
38
     *   => 20 * 600 ≈ 12KB
39
     */
40
    public const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
41
 
42
    /**
43
     * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
44
     * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
45
     * and the string will be quickly retrieved.
46
     * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
47
     * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
48
     * best when the indexes of the shared strings are sorted in the sheet data.
49
     * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
50
     */
51
    public const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
52
 
53
    private readonly MemoryLimit $memoryLimit;
54
 
55
    public function __construct(MemoryLimit $memoryLimit)
56
    {
57
        $this->memoryLimit = $memoryLimit;
58
    }
59
 
60
    /**
61
     * Returns the best caching strategy, given the number of unique shared strings
62
     * and the amount of memory available.
63
     *
64
     * @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
65
     * @param string   $tempFolder               Temporary folder where the temporary files to store shared strings will be stored
66
     *
67
     * @return CachingStrategyInterface The best caching strategy
68
     */
69
    public function createBestCachingStrategy(?int $sharedStringsUniqueCount, string $tempFolder): CachingStrategyInterface
70
    {
71
        if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
72
            return new InMemoryStrategy($sharedStringsUniqueCount);
73
        }
74
 
75
        return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
76
    }
77
 
78
    /**
79
     * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
80
     * and the amount of memory available.
81
     *
82
     * @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
83
     */
84
    private function isInMemoryStrategyUsageSafe(?int $sharedStringsUniqueCount): bool
85
    {
86
        // if the number of shared strings in unknown, do not use "in memory" strategy
87
        if (null === $sharedStringsUniqueCount) {
88
            return false;
89
        }
90
 
91
        $memoryAvailable = $this->memoryLimit->getMemoryLimitInKB();
92
 
93
        if (-1 === (int) $memoryAvailable) {
94
            // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
95
            $isInMemoryStrategyUsageSafe = ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
96
        } else {
97
            $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
98
            $isInMemoryStrategyUsageSafe = ($memoryAvailable > $memoryNeeded);
99
        }
100
 
101
        return $isInMemoryStrategyUsageSafe;
102
    }
103
}