File size: 5,591 Bytes
d2897cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
<?php

declare(strict_types=1);

namespace Mautic\LeadBundle\Command;

use Mautic\CoreBundle\Service\ProcessQueue;
use Mautic\LeadBundle\Deduplicate\ContactDeduper;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
use Symfony\Component\Process\Process;
use Symfony\Component\Stopwatch\Stopwatch;

class DeduplicateCommand extends Command
{
    public const NAME = 'mautic:contacts:deduplicate';

    public function __construct(
        private ContactDeduper $contactDeduper,
        private ParameterBagInterface $params
    ) {
        parent::__construct();
    }

    public function configure(): void
    {
        parent::configure();

        $this->setName(self::NAME)
            ->addOption(
                '--newer-into-older',
                null,
                InputOption::VALUE_NONE,
                'By default, this command will merge older contacts and activity into the newer. Use this flag to reverse that behavior.'
            )
            ->addOption(
                '--batch',
                null,
                InputOption::VALUE_REQUIRED,
                'How many contact duplicates to process at once. Defaults to 100.',
                100
            )
            ->addOption(
                '--processes',
                null,
                InputOption::VALUE_REQUIRED,
                'The commands can run in multiple PHP processes. This option defines how many processes to run. Defaults to 1.',
                1
            )
            ->setHelp(
                <<<'EOT'
The <info>%command.name%</info> command will dedpulicate contacts based on unique identifier values. 

<info>php %command.full_name%</info>
EOT
            );
    }

    protected function execute(InputInterface $input, OutputInterface $output): int
    {
        $newerIntoOlder  = (bool) $input->getOption('newer-into-older');
        $batch           = (int) $input->getOption('batch');
        $processes       = (int) $input->getOption('processes');
        $uniqueFields    = $this->contactDeduper->getUniqueFields('lead');
        $duplicateCount  = $this->contactDeduper->countDuplicatedContacts(array_keys($uniqueFields));
        $stopwatch       = new Stopwatch();

        if (!$duplicateCount) {
            $output->writeln('<error>No contacts to deduplicate.</error>');

            return Command::FAILURE;
        }

        $stopwatch->start('deduplicate');

        $output->writeln('Deduplicating contacts based on unique identifiers: '.implode(', ', $uniqueFields));
        $output->writeln("{$duplicateCount} contacts found to deduplicate");

        $processQueue = new ProcessQueue($processes);
        $processCount = (int) ceil($duplicateCount / $batch);

        $output->writeln('');
        $output->writeln("Finding duplicates and creating processes for deduplication. {$processCount} processes will be queued.");

        $contactIds      = $this->contactDeduper->getDuplicateContactIds(array_keys($uniqueFields));
        $contactIdChunks = array_chunk($contactIds, $batch);
        foreach ($contactIdChunks as $contactIdBatch) {
            $command = [
                $this->params->get('kernel.project_dir').'/bin/console',
                DeduplicateIdsCommand::NAME,
                '--contact-ids',
                implode(',', $contactIdBatch),
                '-e',
                MAUTIC_ENV,
            ];

            if ($newerIntoOlder) {
                $command[] = '--newer-into-older';
            }

            $envParams = [
                'db_table_prefix'                     => MAUTIC_TABLE_PREFIX,
                'contact_unique_identifiers_operator' => $this->params->get('mautic.contact_unique_identifiers_operator'),
            ];

            $processQueue->enqueue(new Process($command, null, ['MAUTIC_CONFIG_PARAMETERS' => json_encode($envParams)]));
        }

        $output->writeln('');
        $output->writeln("Starting to execute the {$processCount} processes for deduplication. {$processes} processes will be executed in parallel.");

        $progressBar = new ProgressBar($output, $processCount);
        $progressBar->setFormat('debug');
        $progressBar->start();

        $processQueue->refresh();

        while ($processQueue->isProcessing()) {
            usleep(100);
            $processQueue->refresh();
            $progressBar->setProgress($processQueue->getProcessedCount());
        }

        $output->writeln('');
        $output->writeln('');
        $output->writeln('All processes have finished. The output of each process is below.');

        foreach ($processQueue->getProcessed() as $process) {
            $output->writeln("<comment>{$process->getCommandLine()}</comment>");
            if (0 === $process->getExitCode()) {
                $output->writeln("<info>{$process->getOutput()}</info>");
            } else {
                $output->writeln("<error>{$process->getErrorOutput()}</error>");
            }
        }

        $progressBar->finish();

        $event = $stopwatch->stop('deduplicate');
        $output->writeln('');
        $output->writeln("Duration: {$event->getDuration()} ms, Memory: {$event->getMemory()} bytes");

        return Command::SUCCESS;
    }

    protected static $defaultDescription = 'Merge contacts based on same unique identifiers';
}